From 5c7dae71733610e2ec6fc73d3158fdf8e389c5f8 Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Wed, 23 Oct 2024 14:54:01 +0200 Subject: [PATCH] Updating notebooks section --- docs/nbdoc/consts.py | 2 +- .../3D-pose-estimation-with-output.rst | 212 +-- ...-segmentation-point-clouds-with-output.rst | 14 +- .../action-recognition-webcam-with-output.rst | 9 +- ...on-recognition-webcam-with-output_22_0.png | 3 - docs/notebooks/all_notebooks_paths.txt | 6 +- ...-lightweight-text-to-image-with-output.rst | 52 +- docs/notebooks/animate-anyone-with-output.rst | 270 ++-- docs/notebooks/async-api-with-output.rst | 15 +- .../async-api-with-output_23_0.png | 4 +- docs/notebooks/auto-device-with-output.rst | 59 +- .../auto-device-with-output_27_0.png | 4 +- .../auto-device-with-output_28_0.png | 4 +- ...p-zero-shot-classification-with-output.rst | 10 +- ...ontrolnet-stable-diffusion-with-output.rst | 2 +- .../convert-to-openvino-with-output.rst | 101 +- .../convnext-classification-with-output.rst | 2 +- ...ss-lingual-books-alignment-with-output.rst | 9 +- ...segmentation-quantize-nncf-with-output.rst | 82 +- ...ntation-quantize-nncf-with-output_37_1.png | 4 +- ...ddcolor-image-colorization-with-output.rst | 166 +- .../depth-anything-v2-with-output.rst | 49 +- docs/notebooks/depth-anything-with-output.rst | 36 +- .../detectron2-to-openvino-with-output.rst | 145 +- ...etectron2-to-openvino-with-output_22_0.jpg | 4 +- ...etectron2-to-openvino-with-output_22_0.png | 4 +- ...etectron2-to-openvino-with-output_32_0.jpg | 4 +- ...etectron2-to-openvino-with-output_32_0.png | 4 +- .../distil-whisper-asr-with-output.rst | 4 +- ...rt-sequence-classification-with-output.rst | 56 +- ...ly-2-instruction-following-with-output.rst | 6 +- ...micrafter-animating-images-with-output.rst | 90 +- docs/notebooks/efficient-sam-with-output.rst | 80 +- .../efficient-sam-with-output_17_1.png | 4 +- .../efficient-sam-with-output_25_1.png | 4 +- .../efficient-sam-with-output_36_1.png | 4 +- .../encodec-audio-compression-with-output.rst | 18 +- .../explainable-ai-1-basic-with-output.rst | 58 +- ...xplainable-ai-1-basic-with-output_11_1.png | 3 - ...xplainable-ai-1-basic-with-output_11_2.png | 3 + ...xplainable-ai-1-basic-with-output_19_0.png | 3 + ...xplainable-ai-1-basic-with-output_19_1.png | 3 - ...explainable-ai-2-deep-dive-with-output.rst | 364 +++-- ...inable-ai-2-deep-dive-with-output_10_1.png | 3 - ...inable-ai-2-deep-dive-with-output_11_2.png | 3 + ...inable-ai-2-deep-dive-with-output_21_1.png | 3 - ...inable-ai-2-deep-dive-with-output_22_1.png | 3 + ...inable-ai-2-deep-dive-with-output_24_0.png | 3 - ...inable-ai-2-deep-dive-with-output_25_1.png | 3 + ...inable-ai-2-deep-dive-with-output_32_1.png | 3 - ...inable-ai-2-deep-dive-with-output_33_0.png | 3 - ...inable-ai-2-deep-dive-with-output_34_0.png | 3 - ...inable-ai-2-deep-dive-with-output_34_1.png | 3 + ...inable-ai-2-deep-dive-with-output_35_0.png | 3 + ...inable-ai-2-deep-dive-with-output_36_0.png | 3 + ...inable-ai-2-deep-dive-with-output_45_0.png | 3 - ...inable-ai-2-deep-dive-with-output_49_0.png | 3 + ...inable-ai-2-deep-dive-with-output_52_0.png | 3 + ...inable-ai-2-deep-dive-with-output_57_1.png | 3 - ...inable-ai-2-deep-dive-with-output_63_1.png | 3 + ...le-ai-3-map-interpretation-with-output.rst | 150 +- ...-3-map-interpretation-with-output_50_0.png | 4 +- ...-3-map-interpretation-with-output_53_0.png | 4 +- ...-3-map-interpretation-with-output_56_0.png | 4 +- ...-3-map-interpretation-with-output_59_0.png | 4 +- .../fast-segment-anything-with-output.rst | 30 +- docs/notebooks/florence2-with-output.rst | 42 +- .../flux.1-image-generation-with-output.rst | 245 +-- ...ux.1-image-generation-with-output_20_1.jpg | 4 +- ...ux.1-image-generation-with-output_20_1.png | 4 +- .../freevc-voice-conversion-with-output.rst | 32 +- .../grounded-segment-anything-with-output.rst | 15 +- .../notebooks/handwritten-ocr-with-output.rst | 12 +- .../notebooks/hello-detection-with-output.rst | 2 +- .../hello-segmentation-with-output.rst | 14 +- docs/notebooks/hello-world-with-output.rst | 14 +- .../hugging-face-hub-with-output.rst | 57 +- docs/notebooks/image-bind-with-output.rst | 2 +- ...lassification-quantization-with-output.rst | 54 +- docs/notebooks/instant-id-with-output.rst | 2 +- ...ruct-pix2pix-image-editing-with-output.rst | 9 +- docs/notebooks/internvl2-with-output.rst | 28 +- docs/notebooks/jina-clip-with-output.rst | 62 +- .../knowledge-graphs-conve-with-output.rst | 26 +- ...modal-large-language-model-with-output.rst | 32 +- ...-large-language-model-with-output_29_1.jpg | 4 +- ...-large-language-model-with-output_29_1.png | 4 +- ...-large-language-model-with-output_48_1.png | 4 +- ...l-large-language-model-with-output_8_0.jpg | 4 +- ...l-large-language-model-with-output_8_0.png | 4 +- .../language-quantize-bert-with-output.rst | 64 +- ...stency-models-optimum-demo-with-output.rst | 54 +- ...y-models-optimum-demo-with-output_15_1.jpg | 4 +- ...y-models-optimum-demo-with-output_15_1.png | 4 +- ...cy-models-optimum-demo-with-output_8_1.jpg | 4 +- ...cy-models-optimum-demo-with-output_8_1.png | 4 +- .../lcm-lora-controlnet-with-output.rst | 2 +- ...a-multimodal-chatbot-genai-with-output.rst | 509 ++++++ ...timodal-chatbot-genai-with-output_21_0.jpg | 3 + ...timodal-chatbot-genai-with-output_21_0.png | 3 + ...multimodal-chatbot-optimum-with-output.rst | 508 ++++++ ...modal-chatbot-optimum-with-output_20_0.jpg | 3 + ...modal-chatbot-optimum-with-output_20_0.png | 3 + .../llava-multimodal-chatbot-with-output.rst | 1342 ---------------- ...va-multimodal-chatbot-with-output_20_1.jpg | 3 - ...va-multimodal-chatbot-with-output_20_1.png | 3 - ...lm-agent-functioncall-qwen-with-output.rst | 2 +- .../notebooks/llm-agent-react-with-output.rst | 560 +++++++ .../llm-chatbot-generate-api-with-output.rst | 135 +- docs/notebooks/llm-chatbot-with-output.rst | 183 +-- .../llm-rag-langchain-with-output.rst | 3 +- ...a-content-type-recognition-with-output.rst | 1 - docs/notebooks/meter-reader-with-output.rst | 12 +- ...nicpm-v-multimodal-chatbot-with-output.rst | 58 +- .../mobileclip-video-search-with-output.rst | 173 +- ...bilevlm-language-assistant-with-output.rst | 52 +- docs/notebooks/model-server-with-output.rst | 9 +- .../music-generation-with-output.rst | 20 +- ...o-llava-multimodal-chatbot-with-output.rst | 1387 +++++++---------- ...a-multimodal-chatbot-with-output_22_0.jpg} | 0 ...a-multimodal-chatbot-with-output_22_0.png} | 0 .../object-detection-with-output.rst | 1 - .../object-detection-with-output_19_0.png | 4 +- .../oneformer-segmentation-with-output.rst | 9 +- docs/notebooks/openvino-api-with-output.rst | 20 +- docs/notebooks/openvoice-with-output.rst | 103 +- ...ical-character-recognition-with-output.rst | 19 +- .../optimize-preprocessing-with-output.rst | 19 +- .../paddle-ocr-webcam-with-output.rst | 4 +- .../paddle-ocr-webcam-with-output_30_0.png | 4 +- ...to-openvino-classification-with-output.rst | 20 +- .../paint-by-example-with-output.rst | 2 +- .../parler-tts-text-to-speech-with-output.rst | 21 +- .../notebooks/person-tracking-with-output.rst | 12 +- .../person-tracking-with-output_25_0.png | 4 +- docs/notebooks/phi-3-vision-with-output.rst | 47 +- docs/notebooks/photo-maker-with-output.rst | 90 +- docs/notebooks/pixart-with-output.rst | 77 +- docs/notebooks/pixtral-with-output.rst | 90 +- .../pose-estimation-with-output_22_0.png | 4 +- .../pytorch-onnx-to-openvino-with-output.rst | 8 +- ...training-quantization-nncf-with-output.rst | 104 +- ...uantization-aware-training-with-output.rst | 78 +- ...on-sparsity-aware-training-with-output.rst | 360 ++--- .../pytorch-to-openvino-with-output.rst | 14 +- docs/notebooks/qwen2-audio-with-output.rst | 93 +- docs/notebooks/qwen2-vl-with-output.rst | 95 +- .../rmbg-background-removal-with-output.rst | 2 +- ...ce-text-to-video-retrieval-with-output.rst | 94 +- .../segment-anything-2-image-with-output.rst | 1229 +++++++++++++++ ...ment-anything-2-image-with-output_12_0.png | 3 + ...ment-anything-2-image-with-output_16_0.png | 3 + ...ment-anything-2-image-with-output_18_0.png | 3 + ...ment-anything-2-image-with-output_33_0.png | 3 + ...ment-anything-2-image-with-output_39_0.png | 3 + ...ment-anything-2-image-with-output_47_0.png | 3 + ...ment-anything-2-image-with-output_51_0.png | 3 + ...ment-anything-2-image-with-output_56_0.png | 3 + ...ment-anything-2-image-with-output_60_0.png | 3 + ...ment-anything-2-image-with-output_65_0.png | 3 + ...ment-anything-2-image-with-output_77_1.jpg | 3 + ...ment-anything-2-image-with-output_77_1.png | 3 + ...-shot-image-classification-with-output.rst | 12 +- ...-image-classification-with-output_24_1.png | 2 +- ...tch-to-image-pix2pix-turbo-with-output.rst | 114 +- ...o-image-pix2pix-turbo-with-output_18_0.jpg | 4 +- ...o-image-pix2pix-turbo-with-output_18_0.png | 4 +- .../sparsity-optimization-with-output.rst | 218 --- .../speculative-sampling-with-output.rst | 83 +- ...tion-quantization-wav2vec2-with-output.rst | 168 +- ...hbrain-emotion-recognition-with-output.rst | 11 +- ...e-cascade-image-generation-with-output.rst | 77 +- ...cade-image-generation-with-output_29_2.jpg | 4 +- ...cade-image-generation-with-output_29_2.png | 4 +- ...table-diffusion-ip-adapter-with-output.rst | 45 +- ...-diffusion-ip-adapter-with-output_22_1.png | 4 +- ...-diffusion-ip-adapter-with-output_25_0.png | 4 +- ...-diffusion-ip-adapter-with-output_28_0.png | 4 +- ...fusion-torchdynamo-backend-with-output.rst | 10 +- ...-diffusion-v2-optimum-demo-with-output.rst | 35 +- ...sion-v2-optimum-demo-with-output_13_1.jpg} | 0 ...sion-v2-optimum-demo-with-output_13_1.png} | 0 .../stable-diffusion-xl-with-output.rst | 410 +---- .../stable-diffusion-xl-with-output_12_1.jpg | 3 + .../stable-diffusion-xl-with-output_12_1.png | 3 + .../stable-diffusion-xl-with-output_13_1.jpg | 3 - .../stable-diffusion-xl-with-output_13_1.png | 3 - .../stable-diffusion-xl-with-output_20_1.jpg | 3 + .../stable-diffusion-xl-with-output_20_1.png | 3 + .../stable-diffusion-xl-with-output_21_1.jpg | 3 - .../stable-diffusion-xl-with-output_21_1.png | 3 - .../stable-diffusion-xl-with-output_35_1.jpg | 3 - .../stable-diffusion-xl-with-output_35_1.png | 3 - docs/notebooks/stable-fast-3d-with-output.rst | 11 +- docs/notebooks/style-transfer-with-output.rst | 2 +- .../style-transfer-with-output_25_0.png | 4 +- .../table-question-answering-with-output.rst | 58 +- ...fication-nncf-quantization-with-output.rst | 573 ------- ...ion-nncf-quantization-with-output_10_1.png | 3 - ...ion-nncf-quantization-with-output_27_1.png | 3 - ...tion-nncf-quantization-with-output_9_1.png | 3 - ...classification-to-openvino-with-output.rst | 2 +- ...e-segmentation-to-openvino-with-output.rst | 2 +- ...mentation-to-openvino-with-output_39_0.png | 4 +- ...ject-detection-to-openvino-with-output.rst | 8 +- ...detection-to-openvino-with-output_38_0.png | 4 +- ...uantization-aware-training-with-output.rst | 54 +- ...tflite-selfie-segmentation-with-output.rst | 2 +- ...e-selfie-segmentation-with-output_33_0.png | 4 +- .../tflite-to-openvino-with-output.rst | 26 +- .../triposr-3d-reconstruction-with-output.rst | 476 ------ docs/notebooks/typo-detector-with-output.rst | 27 +- ...eollava-multimodal-chatbot-with-output.rst | 1134 -------------- ...va-multimodal-chatbot-with-output_18_3.png | 3 - .../vision-background-removal-with-output.rst | 91 +- .../vision-monodepth-with-output.rst | 6 +- docs/notebooks/wav2lip-with-output.rst | 88 +- .../whisper-asr-genai-with-output.rst | 356 ++--- ...isper-subtitles-generation-with-output.rst | 632 +++----- ...uerstchen-image-generation-with-output.rst | 4 +- ...ov11-instance-segmentation-with-output.rst | 74 +- ...instance-segmentation-with-output_46_0.png | 4 +- ...yolov11-keypoint-detection-with-output.rst | 80 +- ...11-keypoint-detection-with-output_43_0.png | 4 +- .../yolov11-object-detection-with-output.rst | 76 +- ...ov11-object-detection-with-output_43_0.png | 4 +- docs/notebooks/yolov8-obb-with-output.rst | 1 + .../yolov9-optimization-with-output.rst | 81 +- .../yolov9-optimization-with-output_36_0.png | 4 +- 229 files changed, 6713 insertions(+), 9108 deletions(-) delete mode 100644 docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png delete mode 100644 docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png create mode 100644 docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png create mode 100644 docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png delete mode 100644 docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png delete mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png create mode 100644 docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png create mode 100644 docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst create mode 100644 docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg create mode 100644 docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png create mode 100644 docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst create mode 100644 docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg create mode 100644 docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png delete mode 100644 docs/notebooks/llava-multimodal-chatbot-with-output.rst delete mode 100644 docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg delete mode 100644 docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png create mode 100644 docs/notebooks/llm-agent-react-with-output.rst rename docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/{nano-llava-multimodal-chatbot-with-output_7_1.jpg => nano-llava-multimodal-chatbot-with-output_22_0.jpg} (100%) rename docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/{nano-llava-multimodal-chatbot-with-output_7_1.png => nano-llava-multimodal-chatbot-with-output_22_0.png} (100%) create mode 100644 docs/notebooks/segment-anything-2-image-with-output.rst create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_12_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_16_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_18_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_33_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_39_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_47_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_51_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_56_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_60_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_65_0.png create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_77_1.jpg create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_77_1.png rename docs/notebooks/stable-diffusion-v2-optimum-demo-with-output_files/{stable-diffusion-v2-optimum-demo-with-output_11_1.jpg => stable-diffusion-v2-optimum-demo-with-output_13_1.jpg} (100%) rename docs/notebooks/stable-diffusion-v2-optimum-demo-with-output_files/{stable-diffusion-v2-optimum-demo-with-output_11_1.png => stable-diffusion-v2-optimum-demo-with-output_13_1.png} (100%) create mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_12_1.jpg create mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_12_1.png delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_13_1.jpg delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_13_1.png create mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_20_1.jpg create mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_20_1.png delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_21_1.jpg delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_21_1.png delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_35_1.jpg delete mode 100644 docs/notebooks/stable-diffusion-xl-with-output_files/stable-diffusion-xl-with-output_35_1.png delete mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output.rst delete mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png delete mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png delete mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png delete mode 100644 docs/notebooks/triposr-3d-reconstruction-with-output.rst delete mode 100644 docs/notebooks/videollava-multimodal-chatbot-with-output.rst delete mode 100644 docs/notebooks/videollava-multimodal-chatbot-with-output_files/videollava-multimodal-chatbot-with-output_18_3.png diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index c6965d054f0991..75c8111bafcc6b 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241007220823/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241022220806/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index cdb1e4ff5083cd..e5e8e3813b5173 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -11,14 +11,14 @@ of this notebook, you will see live inference results from your webcam out the algorithms. **Make sure you have properly installed the**\ `Jupyter extension `__\ **and -been using JupyterLab to run the demo as suggested in the** -``README.md`` +been using JupyterLab to run the demo as suggested in the +``README.md``** **NOTE**: *To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a remote server, the webcam will not work. However, you can still do inference on a video file in - the final step. This demo utilizes the Python interface in* - ``Three.js`` *integrated with WebGL to process data from the model + the final step. This demo utilizes the Python interface in + ``Three.js`` integrated with WebGL to process data from the model inference. These results are processed and displayed in the notebook.* @@ -87,7 +87,7 @@ Prerequisites -**The** ``pythreejs`` **extension may not display properly when using a +**The ``pythreejs`` extension may not display properly when using a Jupyter Notebook release. Therefore, it is recommended to use Jupyter Lab instead.** @@ -109,61 +109,61 @@ Lab instead.** Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) Collecting onnx<1.16.2 Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) Collecting openvino==2024.4.0 (from openvino-dev>=2024.0.0) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) - Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB) + Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting protobuf>=3.20.2 (from onnx<1.16.2) Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl (4.7 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) @@ -175,11 +175,11 @@ Lab instead.** Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) - Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB) + Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.9.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -193,28 +193,28 @@ Imports import collections import time from pathlib import Path - + import cv2 import ipywidgets as widgets import numpy as np from IPython.display import clear_output, display import openvino as ov - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) with open("notebook_utils.py", "w") as f: f.write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", ) with open("engine3js.py", "w") as f: f.write(r.text) - + import notebook_utils as utils import engine3js as engine @@ -236,19 +236,19 @@ directory structure and downloads the selected model. # directory where model will be downloaded base_model_dir = "model" - + # model name as named in Open Model Zoo model_name = "human-pose-estimation-3d-0001" # selected precision (FP32, FP16) precision = "FP32" - + BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}" model_path = Path(BASE_MODEL_NAME).with_suffix(".pth") onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx") - + ir_model_path = Path(f"model/public/{model_name}/{precision}/{model_name}.xml") model_weights_path = Path(f"model/public/{model_name}/{precision}/{model_name}.bin") - + if not model_path.exists(): download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" ! $download_command @@ -257,12 +257,12 @@ directory structure and downloads the selected model. .. parsed-literal:: ################|| Downloading human-pose-estimation-3d-0001 ||################ - + ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - + + ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - + Convert Model to OpenVINO IR format @@ -287,22 +287,22 @@ IR format. .. parsed-literal:: ========== Converting human-pose-estimation-3d-0001 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(weights, map_location='cpu')) ONNX check passed successfully. - + ========== Converting human-pose-estimation-3d-0001 to IR (FP32) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False - + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False + [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. + In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin - + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin + Select inference device @@ -315,7 +315,7 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 device = utils.device_widget() - + device @@ -350,7 +350,7 @@ created to infer the compiled model. compiled_model = core.compile_model(model=model, device_name=device.value) infer_request = compiled_model.create_infer_request() input_tensor_name = model.inputs[0].get_any_name() - + # get input and output names of nodes input_layer = compiled_model.input(0) output_layers = list(compiled_model.outputs) @@ -390,25 +390,25 @@ input for the 3D model. This is how you obtain the output heat maps, PAF def model_infer(scaled_img, stride): """ Run model inference on the input image - + Parameters: scaled_img: resized image according to the input size of the model stride: int, the stride of the window """ - + # Remove excess space from the picture img = scaled_img[ 0 : scaled_img.shape[0] - (scaled_img.shape[0] % stride), 0 : scaled_img.shape[1] - (scaled_img.shape[1] % stride), ] - + img = np.transpose(img, (2, 0, 1))[None,] infer_request.infer({input_tensor_name: img}) # A set of three inference results is obtained results = {name: infer_request.get_tensor(name).data[:] for name in {"features", "heatmaps", "pafs"}} # Get the results results = (results["features"][0], results["heatmaps"][0], results["pafs"][0]) - + return results Draw 2D Pose Overlays @@ -448,8 +448,8 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + body_edges_2d = np.array( [ [0, 1], # neck - nose @@ -471,25 +471,25 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + def draw_poses(frame, poses_2d, scaled_img, use_popup): """ Draw 2D pose overlays on the image to visualize estimated poses. Joints are drawn as circles and limbs are drawn as lines. - + :param frame: the input image :param poses_2d: array of human joint pairs """ for pose in poses_2d: pose = np.array(pose[0:-1]).reshape((-1, 3)).transpose() was_found = pose[2] > 0 - + pose[0], pose[1] = ( pose[0] * frame.shape[1] / scaled_img.shape[1], pose[1] * frame.shape[0] / scaled_img.shape[0], ) - + # Draw joints. for edge in body_edges_2d: if was_found[edge[0]] and was_found[edge[1]]: @@ -512,7 +512,7 @@ from Open Model Zoo. -1, cv2.LINE_AA, ) - + return frame Main Processing Function @@ -529,18 +529,18 @@ webcam feed or a video file. """ 2D image as input, using OpenVINO as inference backend, get joints 3D coordinates, and draw 3D human skeleton in the scene - + :param source: The webcam number to feed the video stream with primary webcam set to "0", or the video path. :param flip: To be used by VideoPlayer function for flipping capture image. :param use_popup: False for showing encoded frames over this notebook, True for creating a popup window. :param skip_frames: Number of frames to skip at the beginning of the video. """ - + focal_length = -1 # default stride = 8 player = None skeleton_set = None - + try: # create video player to play with target fps video_path # get the frame from camera @@ -548,16 +548,16 @@ webcam feed or a video file. player = utils.VideoPlayer(source, flip=flip, fps=30, skip_first_frames=skip_frames) # start capturing player.start() - + input_image = player.next() # set the window size resize_scale = 450 / input_image.shape[1] windows_width = int(input_image.shape[1] * resize_scale) windows_height = int(input_image.shape[0] * resize_scale) - + # use visualization library engine3D = engine.Engine3js(grid=True, axis=True, view_width=windows_width, view_height=windows_height) - + if use_popup: # display the 3D human pose in this notebook, and origin frame in popup window display(engine3D.renderer) @@ -567,43 +567,43 @@ webcam feed or a video file. # set the 2D image box, show both human pose and image in the notebook imgbox = widgets.Image(format="jpg", height=windows_height, width=windows_width) display(widgets.HBox([engine3D.renderer, imgbox])) - + skeleton = engine.Skeleton(body_edges=body_edges) - + processing_times = collections.deque() - + while True: # grab the frame frame = player.next() if frame is None: print("Source ended") break - + # resize image and change dims to fit neural network input # (see https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/human-pose-estimation-3d-0001) scaled_img = cv2.resize(frame, dsize=(model.inputs[0].shape[3], model.inputs[0].shape[2])) - + if focal_length < 0: # Focal length is unknown focal_length = np.float32(0.8 * scaled_img.shape[1]) - + # inference start start_time = time.time() # get results inference_result = model_infer(scaled_img, stride) - + # inference stop stop_time = time.time() processing_times.append(stop_time - start_time) # Process the point to point coordinates of the data poses_3d, poses_2d = engine.parse_poses(inference_result, 1, stride, focal_length, True) - + # use processing times from last 200 frames if len(processing_times) > 200: processing_times.popleft() - + processing_time = np.mean(processing_times) * 1000 fps = 1000 / processing_time - + if len(poses_3d) > 0: # From here, you can rotate the 3D point positions using the function "draw_poses", # or you can directly make the correct mapping below to properly display the object image on the screen @@ -616,28 +616,28 @@ webcam feed or a video file. -y + np.ones(poses_3d[:, 2::4].shape) * 100, -x, ) - + poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] people = skeleton(poses_3d=poses_3d) - + try: engine3D.scene_remove(skeleton_set) except Exception: pass - + engine3D.scene_add(people) skeleton_set = people - + # draw 2D frame = draw_poses(frame, poses_2d, scaled_img, use_popup) - + else: try: engine3D.scene_remove(skeleton_set) skeleton_set = None except Exception: pass - + cv2.putText( frame, f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -648,7 +648,7 @@ webcam feed or a video file. 1, cv2.LINE_AA, ) - + if use_popup: cv2.imshow(title, frame) key = cv2.waitKey(1) @@ -662,9 +662,9 @@ webcam feed or a video file. frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90], )[1].tobytes() - + engine3D.renderer.render(engine3D.scene, engine3D.cam) - + except KeyboardInterrupt: print("Interrupted") except RuntimeError as e: @@ -711,10 +711,10 @@ picture on the left to interact. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_path = "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4" - + source = cam_id if USE_WEBCAM else video_path - + run_pose_estimation(source=source, flip=isinstance(source, int), use_popup=False) diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index ce27c3006ac36a..14107e6290f1c7 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -46,20 +46,12 @@ Guide =2023.1.0" "tqdm" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -227,7 +219,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_59833/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_2578096/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -321,7 +313,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_59833/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_2578096/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output.rst b/docs/notebooks/action-recognition-webcam-with-output.rst index 6f9ff9a062ace7..a074549965ea7c 100644 --- a/docs/notebooks/action-recognition-webcam-with-output.rst +++ b/docs/notebooks/action-recognition-webcam-with-output.rst @@ -683,11 +683,16 @@ multi-camera systems). run_action_recognition(source=source, use_popup=False, **additional_options) +.. parsed-literal:: -.. image:: action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png + Cannot open https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4 .. parsed-literal:: - Source ended + [ WARN:0@6.113] global cap.cpp:164 open VIDEOIO(CV_IMAGES): raised OpenCV exception: + + OpenCV(4.10.0) /io/opencv/modules/videoio/src/cap_images.cpp:244: error: (-5:Bad argument) CAP_IMAGES: error, expected '0?[1-9][du]' pattern, got: https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4 in function 'icvExtractPattern' + + diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png deleted file mode 100644 index 2bb95f87f90515..00000000000000 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9ed14955798d0fbcdc284da02f19a12ae92c89fe6c1f4760951a414f4047b66 -size 68016 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index 62e4b205f45f75..9a907132b29c75 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -54,11 +54,12 @@ notebooks/language-quantize-bert/language-quantize-bert.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-image-generation.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb -notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb -notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-optimum.ipynb notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +notebooks/llm-agent-react/llm-agent-react.ipynb notebooks/llm-agent-react/llm-agent-react-langchain.ipynb notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb notebooks/llm-chatbot/llm-chatbot.ipynb @@ -148,7 +149,6 @@ notebooks/tflite-selfie-segmentation/tflite-selfie-segmentation.ipynb notebooks/tflite-to-openvino/tflite-to-openvino.ipynb notebooks/tiny-sd-image-generation/tiny-sd-image-generation.ipynb notebooks/torchvision-zoo-to-openvino/convnext-classification.ipynb -notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb notebooks/typo-detector/typo-detector.ipynb notebooks/vehicle-detection-and-recognition/vehicle-detection-and-recognition.ipynb notebooks/vision-background-removal/vision-background-removal.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index 880bf539025c5e..23e564e135d9ae 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -226,23 +226,23 @@ Convert the Text Encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -351,13 +351,13 @@ suitable. This function repeats part of ``AmusedPipeline``. .. parsed-literal:: - /tmp/ipykernel_60662/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_2578393/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -490,7 +490,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -707,7 +707,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -760,17 +760,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -794,7 +794,7 @@ Demo generation with quantized pipeline .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -878,7 +878,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -890,7 +890,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() @@ -907,8 +907,8 @@ a rough estimate of generation quality. .. parsed-literal:: - Quantized pipeline Inception Score: 11.073053359985352 - Quantization speed-up: 2.07x + Quantized pipeline Inception Score: 11.0730562210083 + Quantization speed-up: 2.09x Interactive inference diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index 15459596dea5bf..a7debae86cef3f 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -1,7 +1,8 @@ Image-to-Video synthesis with AnimateAnyone and OpenVINO ======================================================== -|image0| +.. image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif + `AnimateAnyone `__ tackles the task of generating animation sequences from a single character image. It @@ -36,9 +37,7 @@ repo `__ and .. warning:: - - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. - + This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT`` ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. **Table of contents:** @@ -71,9 +70,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif - - Prerequisites ------------- @@ -104,12 +100,6 @@ Prerequisites %load_ext skip_kernel_extension - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - - Note that we clone a fork of original repo with tweaked forward methods. .. code:: ipython3 @@ -164,11 +154,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -192,7 +182,7 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino Prepare base model @@ -216,13 +206,6 @@ Prepare base model local_dir=local_dir, ) - - -.. parsed-literal:: - - diffusion_pytorch_model.bin: 0%| | 0.00/3.44G [00:00:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - - Convert model to OpenVINO IR ---------------------------- @@ -430,7 +324,7 @@ semantic features are extracted through the CLIP image encoder for Cross-Attention. Temporal-Attention operates in the temporal dimension. Finally, the VAE decoder decodes the result into a video clip. -|image01| +.. image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png The pipeline contains 6 PyTorch modules: @@ -470,8 +364,6 @@ compression parameters. More details about weights compression can be found in `OpenVINO documentation `__. -.. |image01| image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png - .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -529,12 +421,14 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: + WARNING:nncf:NNCF provides best results with torch==2.1.2, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.1.2 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (32 / 32) │ 100% (32 / 32) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (32 / 32) | 100% (32 / 32) | + +--------------+---------------------------+-----------------------------------+ @@ -550,6 +444,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -575,11 +477,12 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (40 / 40) | 100% (40 / 40) | + +--------------+---------------------------+-----------------------------------+ @@ -595,6 +498,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + Reference UNet ~~~~~~~~~~~~~~ @@ -641,11 +552,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (270 / 270) │ 100% (270 / 270) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (270 / 270) | 100% (270 / 270) | + +--------------+---------------------------+-----------------------------------+ @@ -661,6 +573,14 @@ step. + + + + + + + + Denoising UNet ~~~~~~~~~~~~~~ @@ -734,11 +654,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (534 / 534) │ 100% (534 / 534) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (534 / 534) | 100% (534 / 534) | + +--------------+---------------------------+-----------------------------------+ @@ -754,6 +675,14 @@ step. + + + + + + + + Pose Guider ~~~~~~~~~~~ @@ -780,11 +709,12 @@ efficiently integrate pose control signals into the denoising process. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (8 / 8) │ 100% (8 / 8) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (8 / 8) | 100% (8 / 8) | + +--------------+---------------------------+-----------------------------------+ @@ -800,6 +730,14 @@ efficiently integrate pose control signals into the denoising process. + + + + + + + + Image Encoder ~~~~~~~~~~~~~ @@ -825,18 +763,19 @@ required for both reference and denoising UNets. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (146 / 146) │ 100% (146 / 146) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (146 / 146) | 100% (146 / 146) | + +--------------+---------------------------+-----------------------------------+ @@ -852,6 +791,14 @@ required for both reference and denoising UNets. + + + + + + + + Inference --------- @@ -877,6 +824,15 @@ For starting work, please select inference device from dropdown list. device = device_widget() + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=5, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'GPU.3', 'AUTO'), value='A… + + + .. code:: ipython3 class OVPose2VideoPipeline(Pose2VideoPipeline): @@ -1174,7 +1130,7 @@ Video post-processing .. raw:: html @@ -1248,23 +1204,9 @@ Interactive inference demo = make_demo(fn=generate) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(debug=False, share=True) + demo.queue().launch(debug=True, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/" - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 135554acc38de4..e8f5b80d429d81 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -58,21 +58,14 @@ Imports .. code:: ipython3 - import platform - %pip install -q "openvino>=2023.1.0" - %pip install -q opencv-python - if platform.system() != "windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q opencv-python "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -355,7 +348,7 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 63.95 fps + average throuput in sync mode: 63.79 fps Async Mode @@ -494,7 +487,7 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 108.46 fps + average throuput in async mode: 106.83 fps Compare the performance @@ -637,5 +630,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 144.01 fps + average throughput in async mode with async infer queue: 145.36 fps diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index f80b64476e19ea..f3492582efc67f 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b164e09df4e90dc87d63caf35cc832021fbd147354a5300605164fce212e36b8 -size 29453 +oid sha256:84a9b6aab4a04feb319b3243644da6837b3b894122657a8f6639fa604e3b48dd +size 29468 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 5f7d8dfc61502f..28d894eb72b22e 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -82,18 +82,15 @@ Import modules and create Core import platform # Install required packages - %pip install -q "openvino>=2023.1.0" "numpy<2" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.1.0" "matplotlib>=3.4" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -200,16 +197,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [23:32:04.6480]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [23:32:04.6481]I[schedule.cpp:17][AUTO] scheduler starting - [23:32:04.6481]I[auto_schedule.cpp:181][AUTO] select device:CPU - [23:32:04.7787]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 130.622171 ms - [23:32:04.7789]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [23:32:04.7790]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [22:37:15.4888]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [22:37:15.4888]I[schedule.cpp:17][AUTO] scheduler starting + [22:37:15.4888]I[auto_schedule.cpp:181][AUTO] select device:CPU + [22:37:15.5995]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 110.638049 ms + [22:37:15.5996]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [22:37:15.5997]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -222,8 +219,8 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - Deleted compiled_model - [23:32:04.7847]I[schedule.cpp:308][AUTO] scheduler ending + Deleted compiled_model[22:37:15.6060]I[schedule.cpp:308][AUTO] scheduler ending + Explicitly pass AUTO as device_name to Core::compile_model API @@ -381,7 +378,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.13 seconds. + Time to load model using AUTO device and get first inference: 0.15 seconds. .. code:: ipython3 @@ -556,12 +553,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 183.87fps, latency: 31.26ms, time interval: 10.01s - throughput: 184.60fps, latency: 31.70ms, time interval: 10.00s - throughput: 183.24fps, latency: 31.93ms, time interval: 10.01s - throughput: 184.05fps, latency: 31.75ms, time interval: 10.00s - throughput: 184.40fps, latency: 31.77ms, time interval: 10.00s - throughput: 178.41fps, latency: 32.83ms, time interval: 10.02s + throughput: 184.25fps, latency: 31.12ms, time interval: 10.02s + throughput: 184.19fps, latency: 31.80ms, time interval: 10.00s + throughput: 183.00fps, latency: 32.00ms, time interval: 10.01s + throughput: 183.37fps, latency: 31.91ms, time interval: 10.01s + throughput: 178.30fps, latency: 32.90ms, time interval: 10.01s + throughput: 182.80fps, latency: 32.08ms, time interval: 10.01s Done @@ -607,12 +604,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 140.52fps, latency: 6.62ms, time interval: 10.01s - throughput: 142.84fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.14fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.63fps, latency: 6.60ms, time interval: 10.00s - throughput: 143.11fps, latency: 6.61ms, time interval: 10.01s - throughput: 132.99fps, latency: 7.13ms, time interval: 10.01s + throughput: 139.34fps, latency: 6.64ms, time interval: 10.00s + throughput: 141.45fps, latency: 6.63ms, time interval: 10.00s + throughput: 141.42fps, latency: 6.63ms, time interval: 10.01s + throughput: 141.70fps, latency: 6.62ms, time interval: 10.01s + throughput: 130.57fps, latency: 7.22ms, time interval: 10.00s + throughput: 141.61fps, latency: 6.62ms, time interval: 10.01s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index 5ef6531526d989..af14e6c0ac24c7 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cf9ec5f2d8e34510d31d10190c4d7f269bb83a2800891ff865e09ee85e80d95 -size 27103 +oid sha256:05bc2541cede3c32f2f9acff3f93d19572aab80a5bb7ac3e6c77bb397817bb54 +size 25899 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index edc7de70aeb565..e12bb8a48eaa0e 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed265008638a6c38fbfc8397e6bb20d0e1fc900df893f30b7238c518d2380b2b -size 40093 +oid sha256:b1e2e56c28c18b5d31607518b846e78c58e76b2b59a9ee083d02e0d8f8ec2b52 +size 40077 diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 2e5a45826eaceb..3da831e6d9d0dd 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -112,14 +112,7 @@ tokenizer and preparing the images. .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "matplotlib>=3.4" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow import requests @@ -736,6 +729,7 @@ up of the dynamic quantized models. Interactive demo ---------------- + Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index a922fe445a7897..3ab43d897ea658 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -198,7 +198,7 @@ Prerequisites .. code:: ipython3 %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" - %pip install -q "diffusers>=0.14.0" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "diffusers>=0.14.0" "matplotlib>=3.4" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" import requests diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index e5a66d4f74ae17..8ec32adef0e04a 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -39,15 +39,12 @@ Guide =2024.0.0" "requests" "tqdm" "transformers>=4.31" "onnx<1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" + "openvino>=2024.4.0" "requests" "tqdm" "transformers>=4.31" "onnx!=1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) - Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -192,11 +189,11 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-10-07 23:35:08.862114: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:08.907564: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:40:21.522113: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:40:21.555890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:09.444317: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + 2024-10-22 22:40:22.084160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -673,7 +670,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-10-07 23:35:26.468759: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-10-22 22:40:40.138322: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... @@ -724,28 +721,12 @@ Resnet50 model that was exported to the ONNX format: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 +.. code:: python - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") - - -.. parsed-literal:: - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - - -.. parsed-literal:: - - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") Changing Model Layout ^^^^^^^^^^^^^^^^^^^^^ @@ -772,26 +753,17 @@ and the layout of an original model: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") - - # alternatively use source_layout and target_layout parameters - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") +Legacy Model Optimizer API +========================== +.. code:: python -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") + # alternatively use source_layout and target_layout parameters + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") Specifying Mean and Scale Values ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -819,25 +791,18 @@ more examples. ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - - ov_model = mo.convert_model( - ONNX_CV_MODEL_PATH, - mean_values=[255 * x for x in [0.485, 0.456, 0.406]], - scale_values=[255 * x for x in [0.229, 0.224, 0.225]], - ) +.. code:: python + # Legacy Model Optimizer API -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model( + ONNX_CV_MODEL_PATH, + mean_values=[255 * x for x in [0.485, 0.456, 0.406]], + scale_values=[255 * x for x in [0.229, 0.224, 0.225]], + ) Reversing Input Channels ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -862,20 +827,12 @@ the color channels before inference. prep.input("input.1").preprocess().reverse_channels() ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) - - -.. parsed-literal:: +.. code:: python - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) Cutting Off Parts of a Model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 99bc625a13c257..1204ea2c17f106 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.4793865978717804 + Predicted Probability: 0.5351971983909607 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst index 87fad52e92709a..b116f0e1f5cda1 100644 --- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst @@ -69,14 +69,7 @@ Guide =3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets "matplotlib>=3.4" Get Books --------- diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index f60ccb2fadd9e6..6f003fb71d75fb 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -98,20 +98,12 @@ Guide =2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" "matplotlib>=3.4" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -162,10 +154,10 @@ Imports .. parsed-literal:: - 2024-10-07 23:35:52.753512: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:52.788105: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:41:03.560708: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:41:03.596278: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:53.378916: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:41:04.191138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -231,7 +223,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_72009/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_2583350/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -452,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -534,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 249913 / 262144 (95.3%) - Greatest absolute difference: 4.628173828125 at index (0, 0, 430, 337) (up to 1e-05 allowed) - Greatest relative difference: 31968.152067381572 at index (0, 0, 102, 269) (up to 1e-05 allowed) + Mismatched elements: 249255 / 262144 (95.1%) + Greatest absolute difference: 3.8265769481658936 at index (0, 0, 126, 353) (up to 1e-05 allowed) + Greatest relative difference: 18364.59337143498 at index (0, 0, 305, 451) (up to 1e-05 allowed) _check_trace( @@ -671,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.78 ms + [ INFO ] Read model took 8.83 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -685,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 236.15 ms + [ INFO ] Compile model took 250.74 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -722,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 48.84 ms + [ INFO ] First inference took 51.21 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 429 iterations - [ INFO ] Duration: 15015.79 ms + [ INFO ] Count: 392 iterations + [ INFO ] Duration: 15027.48 ms [ INFO ] Latency: - [ INFO ] Median: 34.71 ms - [ INFO ] Average: 34.77 ms - [ INFO ] Min: 34.38 ms - [ INFO ] Max: 37.16 ms - [ INFO ] Throughput: 28.57 FPS + [ INFO ] Median: 34.96 ms + [ INFO ] Average: 38.10 ms + [ INFO ] Min: 34.49 ms + [ INFO ] Max: 48.05 ms + [ INFO ] Throughput: 26.09 FPS .. code:: ipython3 @@ -758,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.92 ms + [ INFO ] Read model took 11.01 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -772,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 239.20 ms + [ INFO ] Compile model took 238.95 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -809,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.32 ms + [ INFO ] First inference took 30.03 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 883 iterations - [ INFO ] Duration: 15004.05 ms + [ INFO ] Count: 952 iterations + [ INFO ] Duration: 15015.31 ms [ INFO ] Latency: - [ INFO ] Median: 15.57 ms - [ INFO ] Average: 16.79 ms - [ INFO ] Min: 15.15 ms - [ INFO ] Max: 22.01 ms - [ INFO ] Throughput: 58.85 FPS + [ INFO ] Median: 15.55 ms + [ INFO ] Average: 15.57 ms + [ INFO ] Min: 15.23 ms + [ INFO ] Max: 17.26 ms + [ INFO ] Throughput: 63.40 FPS Visually Compare Inference Results @@ -913,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1728337035 + Visualizing results with seed 1729629747 @@ -996,8 +988,8 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: - Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.32 seconds, fps:29.73 + Loaded model to AUTO in 0.18 seconds. + Total time for 68 frames: 2.32 seconds, fps:29.79 References @@ -1010,7 +1002,7 @@ Repository `__ - `Neural Network Compression Framework for fast model inference `__ - `OpenVINO API Tutorial `__ - `OpenVINO PyPI (pip -install openvino-dev) `__ +install openvino) `__ **Kits19 Data** - `Kits19 Challenge Homepage `__ - `Kits19 GitHub diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index 03b5eb1e3fd9f0..f7c163c1c77604 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1aac55db34be1df744fd19868762a8b4572a8e19683af72a57d7176b1486af0c -size 380239 +oid sha256:5d329fc21292f69aeee8164a3f805f2e8e61369c42eab565ebafe555cf6d1a1c +size 381131 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index ccd6216d26268d..b2a76fd1a0ded8 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,9 +25,8 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! - -**Table of contents:** +🪄 Let’s start to explore magic of image colorization! #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ @@ -68,7 +67,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -89,14 +88,14 @@ Prerequisites import sys from pathlib import Path import requests - + repo_dir = Path("DDColor") - + if not repo_dir.exists(): !git clone https://github.com/piddnad/DDColor.git - + sys.path.append(str(repo_dir)) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -110,7 +109,7 @@ Prerequisites remote: Counting objects: 100% (76/76), done. remote: Compressing objects: 100% (42/42), done. remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 17.27 MiB/s, done. + Receiving objects: 100% (233/233), 13.34 MiB | 641.00 KiB/s, done. Resolving deltas: 100% (80/80), done. @@ -129,6 +128,13 @@ Prerequisites except Exception: from inference.colorization_pipeline_hf import DDColorHF, ImageColorizationPipelineHF + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + Load PyTorch model ------------------ @@ -143,14 +149,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -163,12 +169,12 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) @@ -207,9 +213,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -224,11 +230,11 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 from notebook_utils import device_widget - + core = ov.Core() - + device = device_widget() - + device @@ -250,36 +256,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -318,7 +324,7 @@ improve model inference speed. .. code:: ipython3 from notebook_utils import quantization_widget - + to_quantize = quantization_widget() to_quantize @@ -334,15 +340,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -357,12 +363,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -374,7 +380,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -386,9 +392,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -406,10 +412,10 @@ Perform model quantization .. parsed-literal:: - 2024-10-07 23:39:33.824396: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:39:33.863560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:45:07.339219: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:45:07.378241: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:39:34.271973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:45:07.784302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -446,7 +452,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -466,9 +472,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -507,17 +513,17 @@ Tool =2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" + %pip install -q "openvino>=2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" "matplotlib>=3.4" %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu @@ -240,7 +241,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_74875/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_2586231/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -272,7 +273,7 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + @@ -306,13 +307,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -404,7 +405,7 @@ range. .. parsed-literal:: - + @@ -626,7 +627,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 14.01 seconds. Total FPS (including video processing): 4.28.Inference FPS: 9.46 + Processed 60 frames in 13.44 seconds. Total FPS (including video processing): 4.46.Inference FPS: 10.42 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -653,7 +654,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -786,10 +787,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:47:57.736195: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:47:57.768920: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:53:01.689628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:53:01.723459: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:47:58.341833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:53:02.312695: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -915,10 +916,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.89 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.78 + Processed 60 frames in 12.93 seconds. Total FPS (including video processing): 4.64.Inference FPS: 12.78 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -998,9 +999,9 @@ Tool =2023.3.0" "datasets>=2.14.6" "nncf" "tqdm" - %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" + %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" "matplotlib>=3.4" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu if platform.python_version_tuple()[1] in ["8", "9"]: @@ -90,9 +90,9 @@ Prerequisites remote: Counting objects: 100% (161/161), done. remote: Compressing objects: 100% (120/120), done. remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) - Receiving objects: 100% (441/441), 237.90 MiB | 23.22 MiB/s, done. + Receiving objects: 100% (441/441), 237.90 MiB | 25.06 MiB/s, done. Resolving deltas: 100% (158/158), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. WARNING: typer 0.12.5 does not provide the extra 'all' @@ -284,13 +284,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -573,7 +573,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.58 seconds. Total FPS (including video processing): 4.42.Inference FPS: 10.20 + Processed 60 frames in 13.51 seconds. Total FPS (including video processing): 4.44.Inference FPS: 10.15 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -600,7 +600,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -733,10 +733,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:57:01.421550: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:57:01.453134: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 23:02:01.251241: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 23:02:01.284565: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:57:02.034824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 23:02:01.862503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -862,10 +862,10 @@ data. .. parsed-literal:: - Processed 60 frames in 13.06 seconds. Total FPS (including video processing): 4.59.Inference FPS: 12.23 + Processed 60 frames in 12.84 seconds. Total FPS (including video processing): 4.67.Inference FPS: 12.71 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -945,9 +945,9 @@ Tool =2023.1.0" - + import os import requests + from pathlib import Path + import platform + + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + with open("notebook_utils.py", "w") as f: + f.write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + if not Path("pip_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + from pip_helper import pip_install + + if platform.system() == "Darwin": + pip_install("numpy<2.0.0") + pip_install("torch", "torchvision", "opencv-python", "wheel", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("git+https://github.com/facebookresearch/detectron2.git", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("openvino>=2023.1.0") .. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.44.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Collecting git+https://github.com/facebookresearch/detectron2.git + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-we1e_5gi +.. parsed-literal:: + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-we1e_5gi -.. parsed-literal:: - 24692 +.. parsed-literal:: + Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 + Preparing metadata (setup.py): started + Preparing metadata (setup.py): finished with status 'done' + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Collecting cloudpickle (from detectron2==0.6) + Using cached cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB) + Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6) + Using cached fvcore-0.1.5.post20221221-py3-none-any.whl + Collecting hydra-core>=1.1 (from detectron2==0.6) + Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) + Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) + Using cached iopath-0.1.9-py3-none-any.whl.metadata (370 bytes) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Requirement already satisfied: omegaconf<2.4,>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.3.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) + Collecting pycocotools>=2.0.2 (from detectron2==0.6) + Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.5) + Collecting yacs>=0.1.8 (from detectron2==0.6) + Using cached yacs-0.1.8-py3-none-any.whl.metadata (639 bytes) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Collecting portalocker (from iopath<0.1.10,>=0.1.7->detectron2==0.6) + Using cached portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.0) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.4) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) + Using cached iopath-0.1.9-py3-none-any.whl (27 kB) + Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (439 kB) + Using cached yacs-0.1.8-py3-none-any.whl (14 kB) + Using cached cloudpickle-3.1.0-py3-none-any.whl (22 kB) + Using cached portalocker-2.10.1-py3-none-any.whl (18 kB) + Building wheels for collected packages: detectron2 + Building wheel for detectron2 (setup.py): started + Building wheel for detectron2 (setup.py): finished with status 'done' + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313552 sha256=23ceb6e5b734ecc530172b613be139d732deaa2e962d5a8bc940e6b23a85309d + Stored in directory: /tmp/pip-ephem-wheel-cache-65iaghs7/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Successfully built detectron2 + Installing collected packages: yacs, portalocker, cloudpickle, iopath, hydra-core, pycocotools, fvcore, detectron2 + Successfully installed cloudpickle-3.1.0 detectron2-0.6 fvcore-0.1.5.post20221221 hydra-core-1.3.2 iopath-0.1.9 portalocker-2.10.1 pycocotools-2.0.7 yacs-0.1.8 + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index ffb8e4a0030770..6160562199f757 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2341ffe8acbda0ad14e43fca01d72733855b5bde3b29601f9bbeaa4d6ff41207 -size 58357 +oid sha256:b6475f9155bca7152327255fb36bac32dea6f10aa834b6a91d46d4ecc718be0f +size 58279 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 561c47897650fb..77ac4e5e0c1fd8 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce0e6f8e05d4a4e52b304aa95e729a9fac0def06f80f61feacf0405f95dbb31f -size 509296 +oid sha256:b98f4e2e56bb62e6d7c68e536d05695f16b412d4bb8a502f5ced3c466716fe91 +size 508620 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index e8c8278cf4c90d..98087b7c1f10f2 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47fc91d79709effd086bc71f8586af4bc47ce40a460e1d886a10fb3abf0ce2d8 -size 56091 +oid sha256:02cb8226997c872abaee3c70c1ab90c8e6ac078adb81ec6d0721ece6049e7af3 +size 54825 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index cf32ec81286190..e317a3a3005ef6 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afd0387776f050660082e8adde8af8958eaf586f20e55da75b392f53362487ca -size 459024 +oid sha256:940552cfe8d62878a7b2fb827529df2ef6ef4fa135f82f578362142650e0d751 +size 457741 diff --git a/docs/notebooks/distil-whisper-asr-with-output.rst b/docs/notebooks/distil-whisper-asr-with-output.rst index fdd81327b5675a..2cdecfe17a19d4 100644 --- a/docs/notebooks/distil-whisper-asr-with-output.rst +++ b/docs/notebooks/distil-whisper-asr-with-output.rst @@ -85,9 +85,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "transformers>=4.35" "torch>=2.1,<2.4.0" "torchvision<0.19.0" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.35" "torch>=2.4.1" "onnx!=1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.0" "librosa" "soundfile" + %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.19" "librosa" "soundfile" %pip install -q "nncf>=2.6.0" "jiwer" import requests diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst index 463a8051cf4d8b..5f069551357b6d 100644 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -47,31 +47,31 @@ Imports .. parsed-literal:: Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.0) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.1) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -166,9 +166,9 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -274,7 +274,7 @@ For a single input sentence .. parsed-literal:: Label: POSITIVE - Total Time: 0.02 seconds + Total Time: 0.03 seconds Read from a text file diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 01d4b8fed8bb57..9f6857b608d962 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -214,10 +214,10 @@ you can add ``--sym``. For INT4 quantization you can also specify the following arguments : - The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. + quantization, -1 it will results in per-column quantization. - The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 194282459f56cb..584c3442c94af2 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -189,10 +189,10 @@ Prerequisites remote: Counting objects: 100% (153/153), done. remote: Compressing objects: 100% (99/99), done. remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) - Receiving objects: 100% (335/335), 72.41 MiB | 19.06 MiB/s, done. + Receiving objects: 100% (335/335), 72.41 MiB | 22.40 MiB/s, done. Resolving deltas: 100% (123/123), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images @@ -282,7 +282,7 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( @@ -296,6 +296,16 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: AE working on z of shape (1, 4, 32, 32) = 4096 dimensions. + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + +.. parsed-literal:: + >>> model checkpoint loaded. @@ -399,43 +409,43 @@ resolutions. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. std = torch.as_tensor(std, device=data.device, dtype=data.dtype) @@ -464,7 +474,7 @@ Convert AE encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! w_ = w_ * (int(c)**(-0.5)) @@ -508,15 +518,15 @@ Convert Diffusion U-Net model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels @@ -903,14 +913,14 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_79693/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_2590985/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 00:11:25 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.23 seconds + start: man fishing in a boat at sunset 2024-10-22 23:16:18 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 196.25 seconds .. code:: ipython3 @@ -1162,10 +1172,10 @@ quantization time. .. parsed-literal:: - 2024-10-08 00:40:44.424263: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 00:40:44.462873: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 23:45:51.693284: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 23:45:51.735392: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 00:40:45.077046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 23:45:52.354791: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -1347,8 +1357,8 @@ Let’s run the optimized pipeline .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 01:40:46 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.42 seconds + start: man fishing in a boat at sunset 2024-10-23 00:47:13 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.40 seconds .. code:: ipython3 @@ -1453,9 +1463,9 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 193.245 - INT8 latency: 97.168 - Performance speed up: 1.989 + FP32 latency: 195.358 + INT8 latency: 97.265 + Performance speed up: 2.009 Interactive inference diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index e9c5d2f07afec7..8d725c4594afc6 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -82,20 +82,12 @@ Prerequisites .. code:: ipython3 - import platform - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" "matplotlib>=3.4" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -116,9 +108,9 @@ Prerequisites remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 22.33 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 24.94 MiB/s, done. Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 @@ -385,23 +377,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -648,10 +640,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-10-08 01:57:55.723142: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 01:57:55.754489: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:04:24.934254: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:04:24.966235: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 01:57:56.401127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:04:25.612813: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -818,7 +810,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.82 ms + [ INFO ] Read model took 30.59 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -838,7 +830,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1394.30 ms + [ INFO ] Compile model took 1397.68 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -879,17 +871,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 815.67 ms + [ INFO ] First inference took 822.58 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 54 iterations - [ INFO ] Duration: 16885.27 ms + [ INFO ] Count: 51 iterations + [ INFO ] Duration: 16272.65 ms [ INFO ] Latency: - [ INFO ] Median: 1856.65 ms - [ INFO ] Average: 1850.85 ms - [ INFO ] Min: 1459.90 ms - [ INFO ] Max: 2009.04 ms - [ INFO ] Throughput: 3.20 FPS + [ INFO ] Median: 1835.79 ms + [ INFO ] Average: 1847.61 ms + [ INFO ] Min: 1271.60 ms + [ INFO ] Max: 2300.40 ms + [ INFO ] Throughput: 3.13 FPS .. code:: ipython3 @@ -915,7 +907,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 44.15 ms + [ INFO ] Read model took 43.92 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -935,7 +927,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1618.00 ms + [ INFO ] Compile model took 1623.93 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -976,17 +968,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 587.18 ms + [ INFO ] First inference took 578.24 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 58 iterations - [ INFO ] Duration: 16436.53 ms + [ INFO ] Count: 55 iterations + [ INFO ] Duration: 15797.98 ms [ INFO ] Latency: - [ INFO ] Median: 1670.45 ms - [ INFO ] Average: 1680.37 ms - [ INFO ] Min: 1321.28 ms - [ INFO ] Max: 2532.97 ms - [ INFO ] Throughput: 3.53 FPS + [ INFO ] Median: 1695.87 ms + [ INFO ] Average: 1683.49 ms + [ INFO ] Min: 550.40 ms + [ INFO ] Max: 1833.79 ms + [ INFO ] Throughput: 3.48 FPS Interactive segmentation demo @@ -1316,7 +1308,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index 8854bf68943a42..c6c2e7d72e694a 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ed908091274e314601740b2be6f08a06b74532f09d98e99703a91f1155ccd4 -size 1260810 +oid sha256:ee7d364aeabe9a2787785e4d0ee5bb8951f530f90a0a625c9f31047bdc157b59 +size 1261144 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 45da467e43595b..ff03bff6360a90 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:444eaadeac9bde960bc08fc6eed2ca8c5d5de854782d9ea322535ec1e35a38b0 -size 1261402 +oid sha256:40a049edc89f5339af256b80e0fc99740d9e3b6d3159e6e9d09e78683d24e0fe +size 1261722 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index 91cbceb5a9bc44..766db096877cba 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90b937884a9df8f888beb020e5a56dba5ee941f780ac61dd0fda8502909038d2 -size 1261809 +oid sha256:f4fcf4eecf855fa1a9ca72c0dbf5c070e11a4a520873971202b592bc65dd2ccc +size 1261487 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 5036c6f32a2259..394c6e5b92e9bf 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/explainable-ai-1-basic-with-output.rst b/docs/notebooks/explainable-ai-1-basic-with-output.rst index d04827023dc979..1df31312fd752f 100644 --- a/docs/notebooks/explainable-ai-1-basic-with-output.rst +++ b/docs/notebooks/explainable-ai-1-basic-with-output.rst @@ -72,7 +72,9 @@ Guide =2024.2.0" opencv-python tqdm # Install openvino xai package - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -128,12 +130,6 @@ Download the Model and data samples else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - - Select inference device ----------------------- @@ -146,15 +142,6 @@ select device from dropdown list for running inference using OpenVINO device = device_widget() device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model -------------- @@ -187,7 +174,7 @@ Load an Image # Reshape to model input shape. input_image = np.expand_dims(input_image, 0) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -196,7 +183,15 @@ Load an Image -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png Do Inference @@ -218,12 +213,6 @@ Do Inference imagenet_classes = imagenet_filename.read_text().splitlines() - -.. parsed-literal:: - - 'data/imagenet_2012.txt' already exists. - - .. code:: ipython3 # The model description states that for this model, class 0 is a background. @@ -276,23 +265,22 @@ saliency_map}). For classification, targets are indices of the classes. explanation = explainer( data=input_image, targets=result_index, # can be a single target or a container of targets - overlay=True, # saliency map overlay over the input image, defaults to False + label_names=imagenet_classes, # optional, list of label names + overlay=True, # saliency map overlays over the input image, defaults to False ) - plt.imshow(explanation.saliency_map[result_index]) - plt.title(f"Saliency map of the {result_index} class.") - - - - -.. parsed-literal:: - - Text(0.5, 1.0, 'Saliency map of the 206 class.') + explanation.plot() +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +Note: by default, overlay is applied over the image in the ``data`` +argument. In this case, ``data`` was preprocessed (e.g. resized to +224x224), but still recognizable by human. In order for the overlay to +applied over the original image, provide original image with +``original_image`` argument (please refer to `OpenVINO™ Explainable AI +Toolkit (2/3): Deep Dive `__). Above saliency map can help to answer the question: “Which part of the image mostly contributes to the model predicted class: (206, ‘n02099267 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png deleted file mode 100644 index 156c14c9b4af72..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:800d793a4cc16c26b283c899a8eab37260ca4711929b45c8206fc124aa75ab99 -size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png new file mode 100644 index 00000000000000..c6484cb68eeb33 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d8ae99fc5c2d7573e5243b1711dab83c7f4658aa423d067ebd17fd87d336c5 +size 351476 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png deleted file mode 100644 index 00a5b1e39fb4d5..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6207cc5c8c5fbfe4af1d74ef84cbdc03b86e79ed396b6a28bbb1fe2d9a176f7 -size 242785 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst index f96778061cc390..4e2ad0970661d2 100644 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst @@ -57,31 +57,31 @@ predicting a particular label. - `Preprocess image for MobileNet <#preprocess-image-for-mobilenet>`__ -- `Basic usage: Auto mode - explainer <#basic-usage-auto-mode-explainer>`__ +- `Basic usage: Explainer in AUTO + mode <#basic-usage-explainer-in-auto-mode>`__ - - `Create Explainer <#create-explainer>`__ - - `Do explanation <#do-explanation>`__ + - `Create Explainer object <#create-explainer-object>`__ + - `Generate explanation <#generate-explanation>`__ - `Visualize saliency maps <#visualize-saliency-maps>`__ - `Save saliency maps <#save-saliency-maps>`__ - - `Return saliency maps for all - classes <#return-saliency-maps-for-all-classes>`__ + - `Generate saliency maps for all + classes <#generate-saliency-maps-for-all-classes>`__ - `Pre- and post-process functions <#pre--and-post-process-functions>`__ - `Visualization Parameters <#visualization-parameters>`__ -- `White-box explainer <#white-box-explainer>`__ +- `Explainer in WHITEBOX mode <#explainer-in-whitebox-mode>`__ - - `ReciproCAM explain method <#reciprocam-explain-method>`__ + - `ReciproCAM XAI method <#reciprocam-xai-method>`__ - `Insert XAI branch <#insert-xai-branch>`__ - `Insertion-related parameters <#insertion-related-parameters>`__ -- `Black-box explainer <#black-box-explainer>`__ +- `Explainer in BLACKBOX mode <#explainer-in-blackbox-mode>`__ - `Advanced <#advanced>`__ - `Import ImageNet label names and add them to saliency maps <#import-imagenet-label-names-and-add-them-to-saliency-maps>`__ - - `Activation map explain method <#activation-map-explain-method>`__ + - `Activation map XAI method <#activation-map-xai-method>`__ Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -110,9 +110,11 @@ Install requirements import platform # Install openvino package - %pip install -q "openvino>=2024.2.0" opencv-python tqdm + %pip install -q "openvino>=2024.2.0" opencv-python tqdm scipy - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -175,11 +177,10 @@ converted to IR model from OpenVINO storage. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") +.. code:: ipython3 -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - + # Create ov.Model + model = ov.Core().read_model(model_xml_path) Load the Image ~~~~~~~~~~~~~~ @@ -196,7 +197,7 @@ Load the Image # The MobileNet model expects images in RGB format. image = cv2.cvtColor(cv2.imread(filename=str(image_filename)), code=cv2.COLOR_BGR2RGB) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -205,7 +206,15 @@ Load the Image -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png Preprocess image for MobileNet @@ -220,41 +229,41 @@ Preprocess image for MobileNet # Add batch dimension preprocessed_image = np.expand_dims(preprocessed_image, 0) -Basic usage: Auto mode explainer --------------------------------- +Basic usage: ``Explainer`` in ``AUTO`` mode +------------------------------------------- -The easiest way to run the explainer is to do it in Auto mode. Under the -hood of Auto mode, it will first try to run the ``White-Box`` mode. If -this fails, it will then run the ``Black-Box`` mode. See more details -about `White-Box <#white-box-explainer>`__ and -`Black-Box <#black-box-explainer>`__ modes below. +The easiest way to generate saliency maps is to use ``Explainer`` in +``ExplainMode.AUTO`` mode (``AUTO`` mode is used by default). -| Generating saliency maps involves model inference. The explainer will - perform model inference, but to do so, it requires ``preprocess_fn`` - and ``postprocess_fn``. -| At this stage, we can avoid passing ``preprocess_fn`` by preprocessing - the data beforehand (e.g., resizing and adding a batch dimension as - shown above). We also don’t pass ``postprocess_fn`` here for - simplicity, since the White-Box mode doesn’t fail on the example - model. +Under the hood of ``AUTO`` mode, ``Explainer`` will first try to run the +``WHITEBOX`` mode. If ``WHITEBOX`` fails, it will then run the +``BLACKBOX`` mode as a fallback option. See more details about +`WHITEBOX <#explainer-in-whitebox-mode>`__ and +`BLACKBOX <#explainer-in-blackbox-mode>`__ modes below. -To learn more about pre- and post-process functions, refer to the `Pre- +Generating saliency maps involves model inference. The explainer will +perform model inference, but to do so, it requires ``preprocess_fn`` and +``postprocess_fn``. We can avoid passing ``preprocess_fn`` by +preprocessing (e.g., resizing and adding a batch dimension as shown +above) the input data beforehand - by default, ``preprocess_fn`` is the +identity function. We expect that current example will successfully use +``WHITEBOX`` mode under the hood, therefore we don’t pass +``postprocess_fn`` (``postprocess_fn`` is not required for ``WHITEBOX`` +mode, only for ``BLACKBOX``). + +To learn more about pre- and post-process functions, refer to the `pre- and post-process functions <#pre--and-post-process-functions>`__ section. -Create Explainer -~~~~~~~~~~~~~~~~ +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - # Create ov.Model - model = ov.Core().read_model(model_xml_path) - - # Create explainer object explainer = xai.Explainer( model=model, task=xai.Task.CLASSIFICATION, @@ -269,27 +278,27 @@ Create Explainer INFO:openvino_xai:Explaining the model in white-box mode. -Do explanation -~~~~~~~~~~~~~~ +Generate ``explanation`` +~~~~~~~~~~~~~~~~~~~~~~~~ -The predicted label for this image is ``flat-coated_retriever`` with -label index ``206``. So here and further we will check saliency maps for -this index. +The predicted class for this model-image pair is +``flat-coated_retriever`` with class index ``206``. So here and further +we will check saliency maps for this index. .. code:: ipython3 - # You can choose classes to generate saliency maps for. - # In this notebook we will check maps for predicted class 206 - flat-coated retriever + # You can choose class(es) to generate saliency maps for. + # In this notebook we will check maps for predicted class with index 206 - "flat-coated retriever" retriever_class_index = 206 .. code:: ipython3 explanation = explainer( preprocessed_image, - targets=retriever_class_index, - overlay=True, # False by default + targets=retriever_class_index, # can be a single target or a container of targets + overlay=True, # saliency map overlay over the original image, False by default, set to True for better visual inspection ) Visualize saliency maps @@ -300,16 +309,14 @@ Visualize saliency maps .. code:: ipython3 explanation: Explanation - # Dict[int: np.ndarray] where key - class id, value - processed saliency map e.g. 354x500x3 - explanation.saliency_map + # explanation.saliency_map: Dict[int: np.ndarray] # where key - class id, value - processed saliency map (e.g. 354 x 500 x 3 shape) # Check saved saliency maps print(f"Saliency maps were generated for the following classes: {explanation.targets}") print(f"Saliency map size: {explanation.shape}") - # Show saliency maps for retriever class - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map); + # Visualize generated saliency maps for each target class (.plot() supports plotting multiple saliency maps) + explanation.plot() .. parsed-literal:: @@ -319,7 +326,7 @@ Visualize saliency maps -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png Save saliency maps @@ -330,38 +337,48 @@ Save saliency maps .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "explain_auto" - explanation.save(output) + explanation.save(base_artifacts_dir, "explain_auto_") .. code:: ipython3 - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") + # Plot saved saliency map + image_sal_map = cv2.imread(f"{base_artifacts_dir}/explain_auto_{retriever_class_index}.jpg") image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + plt.imshow(image_sal_map) + + + + +.. parsed-literal:: + -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png -Return saliency maps for all classes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png +Generate saliency maps for all classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +To obtain saliency maps for all classes, set ``targets`` to ``None`` or +``-1``. .. code:: ipython3 explanation = explainer(preprocessed_image, targets=-1) # Check saved saliency maps - print(f"Saliency maps were generated for the following classes: {explanation.targets}") + print(f"Saliency maps were generated for the following classes: {explanation.targets[:5]} ... {explanation.targets[-5:]}") print(f"Saliency map size: {explanation.shape}") .. parsed-literal:: - Saliency maps were generated for the following classes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000] + Saliency maps were generated for the following classes: [0, 1, 2, 3, 4] ... [996, 997, 998, 999, 1000] Saliency map size: (224, 224, 3) @@ -379,19 +396,19 @@ constructor. By default, ``preprocess_fn`` is an identity function that passes the input without any changes, assuming it is preprocessed beforehand. -In Auto mode, the explainer tries to run the White-Box mode first. If it -fails, the corresponding exception will be raised, and the Black-Box -mode will be enabled as a fallback. +In ``AUTO`` mode, the explainer tries to run the ``WHITEBOX`` mode +first. If it fails, the corresponding exception will be raised, and the +``BLACKBOX`` mode will be enabled as a fallback. -The Black-Box mode requires access to the output ``logits`` (activated -or not). Therefore, in such cases, ``postprocess_fn`` is required, which -accepts the raw IR model output and returns logits (see below for a -reference). +The ``BLACKBOX`` mode requires access to the output ``logits`` +(activated or not). Therefore, in such cases, ``postprocess_fn`` is +required, which accepts the raw IR model output and returns ``logits`` +(see below for a reference). .. code:: ipython3 def preprocess_fn(x: np.ndarray) -> np.ndarray: - # Implementing own pre-process function based on model's implementation + # Implementing pre-processing based on model's pipeline x = cv2.resize(src=x, dsize=(224, 224)) # Add batch dimension @@ -400,7 +417,7 @@ reference). def postprocess_fn(x: OVDict): - # Implementing own post-process function based on model's implementation + # Implementing post-processing function based on model's pipeline # Return "logits" model output return x[0] @@ -447,7 +464,7 @@ Visualization Parameters # Create explainer object explainer = xai.Explainer(model=model, task=xai.Task.CLASSIFICATION) - # Return overlayed image + # Generate overlayed saliency_map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -455,12 +472,10 @@ Visualization Parameters original_input_image=image, # to apply overlay on the original image instead of preprocessed one that was used for the explainer ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "overlay" - explanation.save(output) + explanation.save(base_artifacts_dir, "overlay_") .. parsed-literal:: @@ -472,33 +487,31 @@ Visualization Parameters -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png .. code:: ipython3 - # Return low-resolution saliency map + # Generate saliency map without overlay over original image explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names overlay=False, # False by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "colormap" - explanation.save(output) + explanation.save(base_artifacts_dir, "colormap_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png .. code:: ipython3 - # Return low-resolution gray-scale saliency map + # Return low-resolution (raw) gray-scale saliency map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -506,37 +519,37 @@ Visualization Parameters colormap=False, # True by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map, cmap="gray") + explanation.plot() # Save saliency map - output = base_artifacts_dir / "grayscale" - explanation.save(output) + explanation.save(base_artifacts_dir, "grayscale_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png -White-Box explainer -------------------- +``Explainer`` in ``WHITEBOX`` mode +---------------------------------- -ReciproCAM explain method +``ReciproCAM`` XAI method ~~~~~~~~~~~~~~~~~~~~~~~~~ -The White-Box explainer treats the model as a white box and needs to -make inner modifications. It adds extra XAI nodes after the backbone to -estimate which activations are important for model prediction. +``Explainer`` in ``WHITEBOX`` mode treats the model as a white box and +performs its inner modifications. ``Explainer`` inserts extra XAI nodes +after the backbone to estimate which activations are important for model +prediction. If a method is not specified, the XAI branch will be generated using the `ReciproCAM `__ method. By default, the insertion of the XAI branch will be done automatically -by searching for the correct node. +by searching for the correct node - ``target_layer`` (``target_layer`` +can be specified manually). It works quickly and precisely, requiring only one model inference. @@ -547,10 +560,8 @@ It works quickly and precisely, requiring only one model inference. model=model, task=xai.Task.CLASSIFICATION, preprocess_fn=preprocess_fn, - # defaults to ExplainMode.AUTO - explain_mode=ExplainMode.WHITEBOX, - # ReciproCAM is the default XAI method for CNNs - explain_method=xai.Method.RECIPROCAM, + explain_mode=ExplainMode.WHITEBOX, # defaults to ExplainMode.AUTO + explain_method=xai.Method.RECIPROCAM, # ReciproCAM is the default white-box method for CNNs ) @@ -586,7 +597,7 @@ environment. model, task=xai.Task.CLASSIFICATION, explain_method=xai.Method.RECIPROCAM, - target_layer="MobilenetV3/Conv_1/Conv2D", # MobileNet V3 + target_layer="MobilenetV3/Conv_1/Conv2D", # optional, by default insert_xai will try to find target_layer automatically embed_scaling=True, ) @@ -598,23 +609,28 @@ environment. INFO:openvino_xai:Insertion of the XAI branch into the model was successful. +**Note**: ``insert_xai`` supports both OpenVINO IR and PyTorch models. +See documentation for more details. + Insertion-related parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If automatic search for correct node fails, you can set up a correct -node manually with ``target_layer`` argument. For classification it’s +node manually with ``target_layer`` argument. For classification, it’s the last backbone node with shape [1, num_channels, feature_map_height, -feature_map_width]. For example, for MobileNetV3 it will be +feature_map_width]. For example, for the used MobileNetV3 it will be ``MobilenetV3/Conv_1/Conv2D`` layer with [1, 576, 7, 7] output shape. To find the right ``target_layer`` for your model, check the name of the -last convolutional layer in the backbone using ``.XML`` model. +last convolutional node in the backbone using ``.XML`` file (optionally, +use some graph visualization tool, such as Netron). ``embed_scaling`` **default True** (for speed purposes), this parameter -adds normalization to the XAI branch, which results in being able to -visualize saliency maps right away without further postprocessing. +ensures that saliency map scaling is embedded into the graph, which +results in being able to visualize saliency maps right away without +further postprocessing. .. code:: ipython3 @@ -638,25 +654,32 @@ visualize saliency maps right away without further postprocessing. INFO:openvino_xai:Explaining the model in white-box mode. -Black-Box explainer -------------------- - +``Explainer`` in ``BLACKBOX`` mode +---------------------------------- -The Black-Box method treats the model as a black box without altering -its structure. Therefore, this method will work on any model that can be -inferred and return class probabilities as output. -The `RISE `__ algorithm used in -Black-Box mode applies random masks to hide parts of the image, -retrieves the resulting class probabilities, and uses this information -to calculate the “importance” of each part of the image for the final -results. After performing thousands of inferences, a summarized saliency -map is generated. +``Explainer`` in ``BLACKBOX`` mode treats the model as a black box +without altering its internal structure. Therefore, this method will +work on any model that can be inferred and return class scores as +output. While it is convenient to treat every model as a black box for -explanation purposes, this algorithm may require a large number of -inferences (defaulting to 5000) to generate a high-quality saliency map. +explanation purposes, black-box method may require a significant number +of inferences (AISE requires 120-500 model inferences). + +Given that the quality of the saliency maps usually correlates with the +number of available inferences, we propose the following presets for the +black-box methods: ``Preset.SPEED``, ``Preset.BALANCE``, +``Preset.QUALITY`` (``Preset.BALANCE`` is used by default). + +AISE (Adaptive Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +AISE is used as a default black-box method. AISE formulates saliency map +generation as a kernel density estimation (KDE) problem, and adaptively +sample input masks using a derivative-free optimizer to maximize mask +saliency score. .. code:: ipython3 @@ -673,45 +696,69 @@ inferences (defaulting to 5000) to generate a high-quality saliency map. explanation = explainer( image, targets=retriever_class_index, - # targets=-1, # Explain all classes overlay=True, - num_masks=1000, # kwargs of the RISE algo ) .. parsed-literal:: INFO:openvino_xai:Explaining the model in black-box mode. - Explaining in synchronous mode: 100%|██████████| 1000/1000 [00:03<00:00, 259.73it/s] .. code:: ipython3 + # Plot saliency map + explanation.plot() + # Save saliency map - output = base_artifacts_dir / "blackbox_explain" - explanation.save(output) + explanation.save(base_artifacts_dir, "blackbox_aise_") + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png + + +RISE (Randomized Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`RISE `__ probes a model by +sub-sampling the input image via random masks and records its response +to each of them. RISE creates random masks from down-scaled space +(e.g. 7×7 grid) and adds random translation shifts for the pixel-level +explanation with further up-sampling. Weighted sum of all sampled masks +used to generate the fine-grained saliency map. + +.. code:: ipython3 + + # Create explainer object + explainer = xai.Explainer( + model=model, + task=xai.Task.CLASSIFICATION, + preprocess_fn=preprocess_fn, + postprocess_fn=postprocess_fn, + explain_mode=ExplainMode.BLACKBOX, # defaults to AUTO + explain_method=xai.Method.RISE, # xai.Method.AISE is used by default + ) - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") - image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + # Generate explanation + explanation = explainer( + image, + targets=retriever_class_index, + overlay=True, + ) +.. code:: ipython3 + # Plot saliency map + explanation.plot() + + # Save saliency map + explanation.save(base_artifacts_dir, "blackbox_rise_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png -For the ``Black-Box explainer``, the number of masks and cells is -crucial for achieving good results. In the example above, it’s evident -that the number of masks was insufficient to create a high-quality map. +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png -Varying the ``num_cells`` and ``num_masks`` parameters can achieve -different goals: - To speed up the explanation, you can reduce the -number of ``num_masks``. However, this will decrease the quality of the -resulting saliency maps, making it suitable for large and focused -objects. - Increasing ``num_cells`` provides a more fine-grained result, -but it requires a larger ``num_masks`` to converge. This approach is -more effective for classes with complex shapes. Advanced -------- @@ -724,9 +771,8 @@ Import ImageNet label names and add them to saliency maps If ``label_names`` are not provided to the explainer call, the saved -saliency map will have the predicted class index, not the name. For -example, ``image_name_target_206.jpg`` instead of -``image_name_target_retriever.jpg``. +saliency map will have the predicted class index, not the label name. +For example, ``206.jpg`` instead of ``retriever.jpg``. To conveniently view label names in saliency maps, we provide ImageNet label names information to the explanation call. @@ -781,8 +827,8 @@ label names information to the explanation call. # Adding ImageNet label names. explanation = explainer( image, - # Return saliency maps for 2 named labels - targets=["flat-coated_retriever", "microwave"], # Also label indices [206, 652] are possible as target + # Return saliency maps for 2 named labels, possible if label_names is provided + targets=["flat-coated_retriever", "microwave"], # slso label indices [206, 652] are possible as target label_names=imagenet_labels, ) @@ -797,8 +843,7 @@ label names information to the explanation call. .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "label_names" - explanation.save(output) + explanation.save(base_artifacts_dir, "label_names_") Below in ``base_artifacts_dir / "label_names"`` you can see saved saliency maps with label name on it: @@ -806,18 +851,18 @@ saliency maps with label name on it: .. code:: ipython3 # See saliency mas saved in `output` with predicted label in image name - for file_name in output.glob("*"): + for file_name in base_artifacts_dir.glob("label_names_*"): print(file_name) .. parsed-literal:: - artifacts/label_names/target_microwave.jpg - artifacts/label_names/target_flat-coated_retriever.jpg + artifacts/label_names_microwave.jpg + artifacts/label_names_flat-coated_retriever.jpg -Activation map explain method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Activation map XAI method +~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -840,11 +885,8 @@ saliency maps for each class, the activation map is returned as explain_method=xai.Method.ACTIVATIONMAP, ) - explanation = explainer(image, targets=-1, overlay=True) - activation_map = explanation.saliency_map["per_image_map"] - - plt.imshow(activation_map) - plt.show() + explanation = explainer(image, overlay=True) + explanation.plot() .. parsed-literal:: @@ -855,5 +897,5 @@ saliency maps for each class, the activation map is returned as -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png deleted file mode 100644 index e5049a64f6dbef..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e52cd26d8672300419d11ceda43b756f44961da4d0ed4ba1b907eb5223d4c546 -size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png deleted file mode 100644 index b1b2f91a1c2a7d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca8b8d022b4ebab307caa62d5de9be5553ee84492bcc1709a8267a3aba8f2374 -size 237061 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png new file mode 100644 index 00000000000000..8822fe615f6f19 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b40d8eb0d4a89c7c24a9cc676e3b4f298e5eabdb7a9d5a5604d4c8533dca7f +size 342254 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png deleted file mode 100644 index 4e780c1f4cba88..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26893793cf6015fdf551d8554731e41aad9a436bfe2df0fcdbf469b20d25eb22 -size 233040 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png new file mode 100644 index 00000000000000..125556adbb530c --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354fa33f0a0e6becba16a5c65dde256282215e75a1e379500fd9e9d5fed7845e +size 235673 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png deleted file mode 100644 index 0f9120d6ab8b9d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd585e98c3b12072d9d2fd2b24f3e46e946f4a3771b10a7b987e3e126b218fb -size 336183 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png deleted file mode 100644 index 541cb8c169552e..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ae15d3b00ae8e36c86ffce352d92f24c370ca2948b53c06bc2bb9a9d3e73356 -size 51371 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png deleted file mode 100644 index f42dca24596405..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90c7bf014e9c04a3ed78cf6965ed82ae371fc7c4a18fd717190e1e5561f0dc0a -size 6162 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png new file mode 100644 index 00000000000000..104fcabc090172 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14efddd662af5ecae15f38d9fa20e001d7c1f1f26418d3a89ea0f489a5aee993 +size 312661 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png new file mode 100644 index 00000000000000..60fdb91b059005 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae625a36aeb18c1fd5f5b7e673b7e58836f8bf914b43906cb4e5c81cb33885f +size 62966 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png new file mode 100644 index 00000000000000..1c0f2dbbeb4f3a --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2596ddca7816d246bea422e0c9b809a41feaf65163d6c27c0b422ba6f16a440 +size 4947 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png deleted file mode 100644 index 6ed4fcc4d48282..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd21924f3fd3631e547d7b0254a148df812f50da4c784bdc4357ad8635e4cd7 -size 354123 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png new file mode 100644 index 00000000000000..2c5b7a96ca9399 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d33ea52b17c068fb6e2bff4ad0d4f0993e38a6a074a3ac1af3ccaefec2199d +size 326076 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png new file mode 100644 index 00000000000000..6fa958fe614823 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ea03631f0daa7400793e57138b4af52e13dc5294a3440688dd27a5034215e +size 324115 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png deleted file mode 100644 index 3f0c1df4b7e57c..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b654a427e48742a5d20a15cf89fb3c7667123ce981c6e719299b42931e696e0 -size 336612 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png new file mode 100644 index 00000000000000..98255daa2893ee --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e60ccfb5582f57e813fbcd3ee55c35677f3cb391e7a4eb67b10319ee911f341 +size 314537 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst index 4a76c94a411d17..537ae36f6a331c 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst @@ -57,12 +57,11 @@ low-confident prediction, and wrong prediction. - `Select inference device <#select-inference-device>`__ - `Load the Model <#load-the-model>`__ - - `Define preprocess_fn and - postprocess_fn <#define-preprocess_fn-and-postprocess_fn>`__ + - `Define preprocess_fn <#define-preprocess_fn>`__ - `Explain <#explain>`__ - - `Create explainer <#create-explainer>`__ + - `Create Explainer object <#create-explainer-object>`__ - `Import ImageNet label names <#import-imagenet-label-names>`__ - `Explain using ImageNet labels <#explain-using-imagenet-labels>`__ @@ -110,7 +109,11 @@ Install requirements # Install openvino package %pip install -q "openvino>=2024.2.0" opencv-python tqdm - %pip install -q --no-deps "openvino-xai>=1.0.0" + + # Install openvino xai package + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -184,12 +187,6 @@ classify since they’re all dog breeds. image_folder_path = data_folder / "imagewoof320" / "imagewoof320" - -.. parsed-literal:: - - Dataset is already downloaded to artifacts and extracted. - - .. code:: ipython3 # Create list of images to explain @@ -236,12 +233,6 @@ scaling and normalization with certain values. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - mobilenetv3_large_100.ra_in1k already downloaded to artifacts - - Prepare model to run inference ------------------------------ @@ -260,15 +251,6 @@ select device from dropdown list for running inference using OpenVINO device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model ~~~~~~~~~~~~~~ @@ -281,17 +263,17 @@ Load the Model model = core.read_model(model=model_xml_path) compiled_model = core.compile_model(model=model, device_name=device.value) -Define preprocess_fn and postprocess_fn -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Define ``preprocess_fn`` +~~~~~~~~~~~~~~~~~~~~~~~~ -To run model inference, you need to define functions to preprocess data -and postprocess the results based on the model’s implementation. Since -the used model is originally from `timm -storage `__, we -need to apply specific timm preprocessing, including normalization and -scaling with certain values. +This notebook using ``WHITEBOX`` mode for model explanation - it is +required to define function to preprocess data (the alternative is to +preprocess input data). Since the used model is originally from `timm +storage `__, it is +required to apply specific timm preprocessing, including normalization +and scaling with certain values. .. code:: ipython3 @@ -312,40 +294,22 @@ scaling with certain values. # Add batch dimension x = np.expand_dims(x, 0) return x - - - def postprocess_fn(x: np.ndarray) -> np.ndarray: - """ - Process model prediction - """ - prediction_processed = softmax(x) - # Remove batch dimention - return prediction_processed[0] - - - def softmax(x): - """Compute softmax values of x.""" - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum() Explain ------- -Create explainer -~~~~~~~~~~~~~~~~ - - +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``explainer`` can internally apply pre-processing during model -inference, allowing raw images as input. -To enable this, define ``preprocess_fn`` and provide it to the explainer -constructor. In cases where we pass multiple unprocessed images, as done -in this notebook, we need to define ``preprocess_fn``. -If it’s not defined, it is assumed that the input is preprocessed. +The ``Explainer`` object can internally apply pre-processing during +model inference, allowing raw images as input. To enable this, define +``preprocess_fn`` and provide it to the explainer constructor. If +``preprocess_fn`` is not defined, it is assumed that the input is +preprocessed. .. code:: ipython3 @@ -375,11 +339,10 @@ Import ImageNet label names If ``label_names`` are not provided to the explainer call, the saved saliency map will have the predicted class index, not the name. For -example, ``image_name_target_167.jpg`` instead of -``image_name_target_English_foxhound.jpg``. +example, ``167.jpg`` instead of ``English_foxhound.jpg``. -To conveniently view label names in saliency maps, we provide ImageNet -label names information to the explanation call. +To conveniently view label names in saliency maps, we prepare and +provide ImageNet label names information to the explanation call. .. code:: ipython3 @@ -430,10 +393,10 @@ to the explainer. targets=[ "flat-coated_retriever", "Samoyed", - ], # Also label indices [206, 258] are possible as target + ], # also label indices [206, 258] are possible as target label_names=imagenet_labels, ) - explanation.save(output, Path(image_path).stem) + explanation.save(output, f"{Path(image_path).stem}_") # pass prefix name with underscore Below in ``base_artifacts_dir / "saliency_maps" / "multiple_images"`` you can see saved saliency maps: @@ -447,8 +410,8 @@ you can see saved saliency maps: .. parsed-literal:: - artifacts/saliency_maps/multiple_images/n02105641_2491_target_flat-coated_retriever.jpg - artifacts/saliency_maps/multiple_images/n02105641_2491_target_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_flat-coated_retriever.jpg Notable use cases in ImageWoof dataset @@ -544,6 +507,20 @@ The cell below contains paths to images with those respective use cases: print(f"Predicted class {imagenet_labels[index]}, index {index}, probability: {score:.2f}") return result_infer, result_idxs, result_scores + + + def postprocess_fn(x: np.ndarray) -> np.ndarray: + """ + Process model prediction + """ + prediction_processed = softmax(x) + return prediction_processed[0] # remove batch dimension + + + def softmax(x): + """Compute softmax values of x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() Explain for each use case ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -573,22 +550,22 @@ Explain for each use case explanation = explainer( image, - targets=result_idxs, # Return saliency maps for predicted classes + targets=result_idxs, # return saliency maps for predicted classes label_names=imagenet_labels, overlay=True, ) - # Save saliency maps, use detailed implementation instead of `explanation.save` - # to return predicted scores for saliency maps as well + saliency_map_name_prefix = f"{image_name}_{gt_info}_pr_" + saliency_map_name_postfix = "_" + confidence_scores = {} for idx, score in zip(result_idxs, result_scores): - target_name = imagenet_labels[idx] - cv2.imwrite( - os.path.join( - output / use_case, - f"{image_name}_{gt_info}_pr_{target_name}_{score:.2f}.jpg", - ), - img=explanation.saliency_map[idx], - ) + confidence_scores[idx] = score + explanation.save( + dir_path=(output / use_case), + prefix=saliency_map_name_prefix, + postfix=saliency_map_name_postfix, + confidence_scores=confidence_scores, + ) .. parsed-literal:: @@ -628,30 +605,30 @@ Explain for each use case True_positive_high_confidence - n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 - n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02088364_2019_gt_beagle_0.97_pr_beagle_0.97 + n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02105641_817_gt_Old English sheepdog_0.96_pr_Old_English_sheepdog_0.96 + n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 True_positive_low_confidence - n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 - n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 n02099601_7942_gt_golden retriever_0.18_pr_golden_retriever_0.18 + n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 + n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 n02086240_1765_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 False_positive_high_confidence n02088364_12304_gt_beagle_0.01_pr_car_mirror_0.82 - n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 - n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 - n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 n02099601_4933_gt_golden retriever_0.05_pr_bubble_0.79 n02096294_2323_gt_Australian terrier_0.00_pr_quilt_0.80 - n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 + n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 n02087394_6357_gt_Rhodesian ridgeback_0.00_pr_dalmatian_0.98 + n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 True_positive_two_predictions - n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_golden_retriever_0.30 + n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_Labrador_retriever_0.57 n02111889_374_gt_Samoyed_0.43_pr_crib_0.39 @@ -693,6 +670,7 @@ of pictures, their names, and the confidence of predictions: for image_path, ax in zip(image_paths, axs): image_sal_map = cv2.imread(f"{use_case_output_dir}/{image_path}") + image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) image_name = Path(image_path).stem image_name = image_name.replace("_target", "") diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png index f149c148287d67..1a9f33c3368b17 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff5a5cd559b24fdfae9eb238438d8e1e65be9c5878d2d8a48551038a2175dd90 -size 935667 +oid sha256:cbb403d4ab869af3d0d82a3a7980192f2da68c7df2fe27ebd34340465592f46e +size 974504 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png index 281a37e1ff476e..62018be36e4c3a 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:056291c27310ffbff98af36276ead5fa216a04e6f55507e0559a86e277783499 -size 893683 +oid sha256:ad8213f746e218068621812a53f4ec4337fb1d0f8fcc763566e5f9e4251cbcb2 +size 917046 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png index 0a23b9ac833d18..40b6043e87691d 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b05bbfa7b014ab362a42ba8cf97c4604fd486b43b76e365802681fb3c2678d6 -size 673321 +oid sha256:12061dc812cec3219863c5936441baa8ce5b86015acf978ca3d4dcf1212b9c02 +size 681815 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png index 5ca94c55195397..a04779fd42ed48 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e61cbfbf2fbac5b143f72a31d5de6ecabb6c6e705995129966becbb09bfa0cec -size 715492 +oid sha256:3f48b01dde2edffcea65f08fafe13ff158314f7e75534bbc5d7af027cbc43f5e +size 746506 diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 65bd0c194a8116..c29a58c33e3490 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -77,8 +77,8 @@ Install requirements .. code:: ipython3 - %pip install -q "ultralytics==8.2.24" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino-dev>=2024.0.0" + %pip install -q "ultralytics==8.2.24" "matplotlib>=3.4" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.4.0" %pip install -q "nncf>=2.9.0" %pip install -q "gradio>=4.13" @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 67.6MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 56.5MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 612.7ms - Speed: 3.0ms preprocess, 612.7ms inference, 794.5ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 662.7ms + Speed: 3.8ms preprocess, 662.7ms inference, 766.0ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -214,10 +214,10 @@ tracing. The FastSAM model itself is based on YOLOv8 model. PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.0s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.2s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.1ms - Speed: 5.7ms preprocess, 498.1ms inference, 31.2ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 558.8ms + Speed: 5.7ms preprocess, 558.8ms inference, 34.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -615,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 70 seconds. - Resulting in 1.83 fps + Segmented in 69 seconds. + Resulting in 1.86 fps .. code:: ipython3 @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 21 seconds - Resulting in 6.1 fps - That is 3.33 times faster! + Segmented in 23 seconds + Resulting in 5.57 fps + That is 3.0 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index a09f2a1ea60399..4cca2d85a2bd6c 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -51,20 +51,12 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "matplotlib>=3.4" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -108,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-10-08 02:10:50.200273: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:10:50.234398: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:17:38.434215: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:17:38.467940: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:10:50.883345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:17:39.118965: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -201,19 +193,19 @@ pipeline. .. parsed-literal:: - config.json: 0%| | 0.00/2.43k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/flux.1-image-generation-with-output.rst b/docs/notebooks/flux.1-image-generation-with-output.rst index 09a05c0e73bf8d..62549bd074d4a6 100644 --- a/docs/notebooks/flux.1-image-generation-with-output.rst +++ b/docs/notebooks/flux.1-image-generation-with-output.rst @@ -51,14 +51,6 @@ Prerequisites %pip install -q "sentencepiece" "protobuf" %pip install -qU "openvino>=2024.4.0" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import requests @@ -116,11 +108,17 @@ FLUX.1-dev version using widget bellow. .. parsed-literal:: - 2024-10-08 02:11:42.908018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:11:42.941481: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:11:43.614104: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + 2024-08-13 17:30:13.543036: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-08-13 17:30:13.544738: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-08-13 17:30:13.579013: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-08-13 17:30:14.449873: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + @@ -187,71 +185,10 @@ and convert each part of pipeline using ``ov.convert_model``. model_dir = convert_flux(model_selector.value) - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - - -.. parsed-literal:: - - ✅ Clip Text encoder conversion finished - ⌛ T5 Text encoder conversion started - ✅ T5 Text encoder conversion finished - ⌛ VAE decoder conversion started - - .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if hidden_states.shape[0] >= 64: - - -.. parsed-literal:: - - ✅ VAE decoder onversion finished - ✅ black-forest-labs/FLUX.1-schnell successfully converted and can be found in FLUX.1-schnell - + ✅ black-forest-labs/FLUX.1-schnell model already converted and can be found in FLUX.1-schnell + .. code:: ipython3 @@ -331,136 +268,11 @@ compression. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - ⌛ transformer compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 0% (1 / 502) │ 0% (0 / 501) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 100% (501 / 502) │ 100% (501 / 501) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ transformer compression finished Compressed transformer can be found in FLUX.1-schnell/transformer/transformer_int4.xml - ⌛ text_encoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 33% (3 / 74) │ 0% (0 / 71) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 67% (71 / 74) │ 100% (71 / 71) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder compression finished Compressed text_encoder can be found in FLUX.1-schnell/text_encoder/text_encoder_int4.xml - ⌛ text_encoder_2 compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 4% (3 / 170) │ 0% (0 / 167) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 96% (167 / 170) │ 100% (167 / 167) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder_2 compression finished Compressed text_encoder_2 can be found in FLUX.1-schnell/text_encoder_2/text_encoder_2_int4.xml - ⌛ vae_decoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 98% (36 / 39) │ 0% (0 / 3) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 2% (3 / 39) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ vae_decoder compression finished Compressed vae_decoder can be found in FLUX.1-schnell/vae/vae_decoder_int4.xml - + Run OpenVINO model inference ---------------------------- @@ -506,7 +318,7 @@ model and inference device as arguments. .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') @@ -521,8 +333,17 @@ model and inference device as arguments. ✅ transformer - Done! ✅ text_encoder - Done! ✅ text_encoder_2 - Done! - ✅ vae - Done! + +.. parsed-literal:: + + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + + +.. parsed-literal:: + + ✅ vae - Done! + .. code:: ipython3 @@ -565,20 +386,6 @@ Interactive demo # demo.launch(share=True) # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - + demo.launch(debug=True, share=True) diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg index d4dcc2dcc6d8b2..7223074e225dd8 100644 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8160a0e39a306d7337c724b69f447f1b3aca95eaf93de866194e65678c0d01ba -size 14210 +oid sha256:8923d4a7ce04ed66bb58c28ab4450594a0340b97a15245fc594e669d33b574ef +size 14369 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png index b7d17c76ea39b4..83b60f80d75c13 100644 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81d23cd276ecf3af1eda5b11e80510737046dd245e3d295965a23f596f4b99f5 -size 115703 +oid sha256:fab1fa5bf3f71d6bb6f505658da6765d37295b7b9618560d5f7c0c344e2a3896 +size 116250 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index 73db954afdfbbb..a3c914a44b36a7 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -104,7 +104,7 @@ Check if FreeVC is installed and append its path to ``sys.path`` remote: Counting objects: 100% (74/74), done. remote: Compressing objects: 100% (47/47), done. remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 17.35 MiB/s, done. + Receiving objects: 100% (131/131), 15.28 MiB | 3.81 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -134,8 +134,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:28<00:00, 44.0MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:26<00:00, 48.4MB/s] .. code:: ipython3 @@ -239,13 +239,13 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") .. parsed-literal:: - Loaded the voice encoder model on cpu in 0.00 seconds. + Loaded the voice encoder model on cpu in 0.01 seconds. Reading dataset settings @@ -288,7 +288,7 @@ Inference .. parsed-literal:: - 2it [00:03, 1.90s/it] + 2it [00:03, 1.72s/it] Result audio files should be available in ‘outputs/freevc’ @@ -360,13 +360,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +581,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25912 / 25920 (100.0%) - Greatest absolute difference: 1.0370872616767883 at index (0, 0, 18175) (up to 1e-05 allowed) - Greatest relative difference: 8656.24884080371 at index (0, 0, 11526) (up to 1e-05 allowed) + Mismatched elements: 25909 / 25920 (100.0%) + Greatest absolute difference: 0.713585376739502 at index (0, 0, 4783) (up to 1e-05 allowed) + Greatest relative difference: 22806.258007743752 at index (0, 0, 19024) (up to 1e-05 allowed) _check_trace( @@ -645,7 +645,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:01, 1.29it/s] + 2it [00:01, 1.30it/s] Result audio files should be available in ‘outputs/freevc’ and you can @@ -707,7 +707,7 @@ Result audio: diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index ee6741ff7af4e7..51522e791e04a4 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -124,16 +124,16 @@ segmentation you can select vanilla ``SAM``. Cloning into 'GroundingDINO'... remote: Enumerating objects: 379, done. remote: Counting objects: 100% (190/190), done. - remote: Compressing objects: 100% (80/80), done. - remote: Total 379 (delta 135), reused 110 (delta 110), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 19.90 MiB/s, done. + remote: Compressing objects: 100% (81/81), done. + remote: Total 379 (delta 135), reused 109 (delta 109), pack-reused 189 (from 1) + Receiving objects: 100% (379/379), 14.03 MiB | 18.28 MiB/s, done. Resolving deltas: 100% (194/194), done. Cloning into 'EfficientSAM'... remote: Enumerating objects: 424, done. remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 25.51 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 30.07 MiB/s, done. Resolving deltas: 100% (246/246), done. @@ -222,6 +222,7 @@ GroundingDINO imports .. parsed-literal:: + FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers UserWarning: Failed to load custom C++ ops. Running on CPU mode Only! @@ -506,10 +507,10 @@ class, but the inference will be done using OpenVINO optimized model. .. parsed-literal:: - 2024-10-08 02:28:09.725059: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:28:09.764729: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:22:04.926963: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:22:04.966234: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:28:10.354526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:22:05.582957: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Convert predicted boxes to supervision box detections format diff --git a/docs/notebooks/handwritten-ocr-with-output.rst b/docs/notebooks/handwritten-ocr-with-output.rst index 9f03d782ed2235..a66f73f07d99b4 100644 --- a/docs/notebooks/handwritten-ocr-with-output.rst +++ b/docs/notebooks/handwritten-ocr-with-output.rst @@ -49,21 +49,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + # Install openvino package + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hello-detection-with-output.rst b/docs/notebooks/hello-detection-with-output.rst index 85dff2edc5cb31..d9293f8da61279 100644 --- a/docs/notebooks/hello-detection-with-output.rst +++ b/docs/notebooks/hello-detection-with-output.rst @@ -41,7 +41,7 @@ Guide =2023.1.0" opencv-python tqdm + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index 6f55f7666710cc..e490cf48533277 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -35,21 +35,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -196,7 +188,7 @@ is provided. .. parsed-literal:: - + @@ -223,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-world-with-output.rst b/docs/notebooks/hello-world-with-output.rst index 23ea9c15b85df0..5bd1216db29701 100644 --- a/docs/notebooks/hello-world-with-output.rst +++ b/docs/notebooks/hello-world-with-output.rst @@ -37,23 +37,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - + # Install required packages + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index b13205541f558f..e17d63a031b445 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -196,7 +196,7 @@ Note how we reuse our real ``encoded_input``, passing it to the .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -333,10 +333,10 @@ documentation `__. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - 2024-10-08 02:29:42.074725: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:29:42.108881: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:23:29.312809: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:23:29.347879: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:29:42.698091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:23:29.943155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Initialize and Convert the Model Automatically using OVModel class @@ -372,13 +372,9 @@ inference run. .. parsed-literal:: - Framework not specified. Using pt to export the model. Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False .. parsed-literal:: @@ -386,11 +382,6 @@ inference run. WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. -.. parsed-literal:: - - Compiling the model to AUTO ... - - Convert model using Optimum CLI interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -443,7 +434,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-10-08 02:29:55.851395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:23:44.450300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4}] @@ -474,18 +465,18 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['image-classification', 'depth-estimation', 'object- - detection', 'zero-shot-object-detection', 'text-to- - audio', 'token-classification', 'audio-frame- - classification', 'semantic-segmentation', 'automatic- - speech-recognition', 'image-segmentation', 'question- - answering', 'text-classification', 'multiple-choice', - 'inpainting', 'masked-im', 'text2text-generation', - 'image-to-text', 'text-generation', 'sentence- - similarity', 'mask-generation', 'text-to-image', - 'audio-xvector', 'zero-shot-image-classification', - 'fill-mask', 'image-to-image', 'feature-extraction', - 'audio-classification']. For decoder models, use `xxx- + ['audio-xvector', 'text-to-audio', 'fill-mask', + 'feature-extraction', 'text-generation', 'zero-shot- + image-classification', 'text-to-image', 'text2text- + generation', 'zero-shot-object-detection', 'automatic- + speech-recognition', 'text-classification', 'semantic- + segmentation', 'masked-im', 'image-to-text', + 'sentence-similarity', 'object-detection', + 'inpainting', 'audio-frame-classification', 'image-to- + image', 'token-classification', 'question-answering', + 'audio-classification', 'image-segmentation', 'image- + classification', 'mask-generation', 'multiple-choice', + 'depth-estimation']. For decoder models, use `xxx- with-past` to export the model using past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, @@ -592,16 +583,10 @@ compression: .. parsed-literal:: - 2024-10-08 02:30:01.388483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. + 2024-10-23 01:23:49.987001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - OpenVINO Tokenizers is not available. To deploy models in production with C++ code, please follow installation instructions: https://github.com/openvinotoolkit/openvino_tokenizers?tab=readme-ov-file#installation - Tokenizer won't be converted. @@ -612,12 +597,6 @@ be loaded using the same OVModelForXXX class. model = OVModelForSequenceClassification.from_pretrained("models/optimum_model/fp16", device=device.value) - -.. parsed-literal:: - - Compiling the model to AUTO ... - - There are some models in the Hugging Face Models Hub, that are already converted and ready to run! You can filter those models out by library name, just type OpenVINO, or follow `this diff --git a/docs/notebooks/image-bind-with-output.rst b/docs/notebooks/image-bind-with-output.rst index 12321638ed19f8..1e8ecd63c1de0e 100644 --- a/docs/notebooks/image-bind-with-output.rst +++ b/docs/notebooks/image-bind-with-output.rst @@ -131,7 +131,7 @@ Prerequisites import platform - %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q datasets regex librosa soundfile pytorchvideo ftfy "timm>=0.6.7" einops fvcore "openvino>=2024.0.0" "nncf>=2.9.0" numpy scipy --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index f45c3ca5ec3b32..b5b33fdfc852e7 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" "nncf>=2.6.0" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "nncf>=2.6.0" torch torchvision tqdm "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -112,7 +104,7 @@ Model preparation stage has the following steps: remote: Counting objects: 100% (281/281), done. remote: Compressing objects: 100% (96/96), done. remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 (from 1) - Receiving objects: 100% (282/282), 9.22 MiB | 18.95 MiB/s, done. + Receiving objects: 100% (282/282), 9.22 MiB | 24.58 MiB/s, done. Resolving deltas: 100% (135/135), done. @@ -184,7 +176,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:06<00:00, 24572685.83it/s] + 100%|██████████| 170498071/170498071 [00:07<00:00, 23490143.19it/s] .. parsed-literal:: @@ -256,10 +248,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-10-08 02:30:41.915322: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:30:41.946467: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:24:29.479834: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:24:29.511386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:30:42.497931: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:24:30.071901: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -427,7 +419,7 @@ Tool =2023.3.0" opencv-python transformers "diffusers>=0.24.0" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" + %pip install -q "openvino>=2023.3.0" opencv-python transformers "diffusers>=0.24.0" "matplotlib>=3.4" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" Convert and prepare Face IdentityNet ------------------------------------ diff --git a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst index 6c27e0431eea09..72a9c88e8ea0c3 100644 --- a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst +++ b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst @@ -71,15 +71,8 @@ Install necessary packages .. code:: ipython3 - import platform - - %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" "matplotlib>=3.4" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" Create Pytorch Models pipeline ------------------------------ diff --git a/docs/notebooks/internvl2-with-output.rst b/docs/notebooks/internvl2-with-output.rst index 038e397209cb6c..602bfe84590494 100644 --- a/docs/notebooks/internvl2-with-output.rst +++ b/docs/notebooks/internvl2-with-output.rst @@ -275,6 +275,16 @@ documentation self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -521,7 +531,7 @@ Let’s check model capabilities in answering questions about image: .. parsed-literal:: - The image displays a young red panda cub. This cute creature has a striking mix of dark reds and whites on its fur, with a striking white underbelly and back. The cub is sitting and peering forward with a curious expression. It appears to be peering through a wooden partition, and a piece of bamboo is visible in the bottom-left corner of the image. The background includes green foliage, suggesting a natural habitat for the cub. This cub is cute and looks like it's enjoying + The image depicts a red panda, a large feline with a distinctive, reddish-brown coat and white face and chest. It is peeking over what appears to be a wooden platform or platform made for panda viewing in captivity. The background is filled with greenery, indicating that the photo was likely taken in a conservatory or wildlife park where penguins or seabirds are displayed. Interactive demo ---------------- diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 5b61ee9af2d3e4..466e3be5d03fd1 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -77,7 +77,7 @@ Prerequisites .. code:: ipython3 %pip install -q "openvino>=2024.2.0" "datasets>=2.20" "nncf>=2.11.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" .. parsed-literal:: @@ -104,50 +104,14 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) - -.. parsed-literal:: - - configuration_clip.py: 0%| | 0.00/11.7k [00:00=2024.0.0" "nncf>=2.11.0" "datasets>=2.20.0" - %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python + %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python "matplotlib>=3.4" %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -152,10 +152,10 @@ example `__ .. parsed-literal:: - 2024-10-08 02:37:47.151795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:37:47.185561: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:31:37.373532: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:31:37.408388: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:37:47.732267: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:31:37.931462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.691 seconds - Optimized pipeline: 1.193 seconds - Performance speed-up: 2.257 + FP32 pipeline: 2.696 seconds + Optimized pipeline: 1.137 seconds + Performance speed-up: 2.372 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 16b3efe503aea0..9673054cc1b8e0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af0a33aba1728df2580f26f7ecf01774c2bc8084835df321d825197a945d775f -size 118088 +oid sha256:c45f05bcfa118a3d7e9024cf83963a4bb5504cb97f669c0c8261c469bab6dc75 +size 121985 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index c47f776f0af026..a5be9e2155f6cc 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebd6f6c9ee8fd3adb0dfbaebe7b41e7bdab039063a56ce21f0c6e01429d5ce6b -size 1151007 +oid sha256:2fc77a26e4c6d30ca9d21fe47ddc25624662d566f6c98ac1c160637fbcb2363b +size 1151151 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 90a6e30cff3f2e..0da4a480d88cc3 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a3b5a0b0c94dec94a9aeb46ebb0ca6af8e63897684e446a299dc31b84851ce2 -size 1148928 +oid sha256:9cfd18faba3aef57349c70b157fabd4dc2827a8bc6074252e717a67a9f9d488a +size 1148752 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 073bf8e86591fb..e992bdf66a9da7 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f41efc8140938f17d9ea5b31d699440fdcc3f3d407eba04abc4d694769f2529 -size 122071 +oid sha256:f70d76f01ee01d54713ed161de8d09713816b4f3fd7df99a5ff96c4c5e0b1bf0 +size 124539 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 5069e51978df61..ec9e6a6277d4f0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa756de5e754a64022b09b445f6851d8ce7373e09d5a776497bbf69513085cc8 -size 1150840 +oid sha256:f4dc2d892ef69df259eff4b46527d5e4382457b9b381a1e3b35b22d1d4312da4 +size 1150974 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index f698389c6f8304..64f49c2b314811 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,10 @@ Imports .. parsed-literal:: - 2024-10-08 02:44:29.489974: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:44:29.524217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:38:12.900514: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:38:12.934654: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:44:30.075180: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:38:13.484434: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,7 +211,7 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -247,7 +247,7 @@ tokenizer from HuggingFace. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -505,9 +505,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.78 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.30 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 107.63 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.67 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.46 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.19 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,27 +548,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.82 ms + [ INFO ] Read model took 18.86 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.48 ms + [ INFO ] Reshape model took 5.46 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 351.96 ms + [ INFO ] Compile model took 373.39 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +609,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 20.92 ms + [ INFO ] First inference took 24.05 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6334 iterations - [ INFO ] Duration: 120017.42 ms + [ INFO ] Count: 5730 iterations + [ INFO ] Duration: 120000.60 ms [ INFO ] Latency: - [ INFO ] Median: 18.69 ms - [ INFO ] Average: 18.85 ms - [ INFO ] Min: 17.15 ms - [ INFO ] Max: 26.62 ms - [ INFO ] Throughput: 52.78 FPS + [ INFO ] Median: 20.64 ms + [ INFO ] Average: 20.84 ms + [ INFO ] Min: 19.84 ms + [ INFO ] Max: 31.70 ms + [ INFO ] Throughput: 47.75 FPS .. code:: ipython3 @@ -646,7 +646,7 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.69 ms + [ INFO ] Read model took 25.60 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] @@ -657,7 +657,7 @@ in OpenVINO. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.39 ms + [ INFO ] Reshape model took 7.46 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] @@ -666,7 +666,7 @@ in OpenVINO. [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1128.78 ms + [ INFO ] Compile model took 1067.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +707,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 15.68 ms + [ INFO ] First inference took 17.45 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 12868 iterations - [ INFO ] Duration: 120000.02 ms + [ INFO ] Count: 13316 iterations + [ INFO ] Duration: 120006.19 ms [ INFO ] Latency: - [ INFO ] Median: 9.01 ms - [ INFO ] Average: 9.23 ms - [ INFO ] Min: 8.43 ms - [ INFO ] Max: 12.91 ms - [ INFO ] Throughput: 107.23 FPS + [ INFO ] Median: 8.92 ms + [ INFO ] Average: 8.92 ms + [ INFO ] Min: 7.66 ms + [ INFO ] Max: 14.16 ms + [ INFO ] Throughput: 110.96 FPS diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst index 348a243480aec1..3707d3f30d5f09 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -134,10 +134,10 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models .. parsed-literal:: - 2024-10-08 02:50:26.200628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:50:26.234856: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:44:02.155955: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:44:02.191160: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:50:26.890470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:44:02.863862: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -231,7 +231,6 @@ and there is no need to do it manually .. parsed-literal:: - Framework not specified. Using pt to export the model. Keyword arguments {'subfolder': '', 'trust_remote_code': False} are not expected by LatentConsistencyModelPipeline and will be ignored. @@ -241,11 +240,6 @@ and there is no need to do it manually Loading pipeline components...: 0%| | 0/7 [00:00 by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . @@ -265,43 +257,40 @@ and there is no need to do it manually OVLatentConsistencyModelPipeline { "_class_name": "OVLatentConsistencyModelPipeline", - "_diffusers_version": "0.30.3", + "_diffusers_version": "0.31.0", + "_name_or_path": "/tmp/tmpz3tx8cvr", "feature_extractor": [ "transformers", "CLIPImageProcessor" ], + "image_encoder": [ + null, + null + ], "requires_safety_checker": true, "safety_checker": [ - "stable_diffusion", - "StableDiffusionSafetyChecker" + null, + null ], "scheduler": [ "diffusers", "LCMScheduler" ], "text_encoder": [ - "optimum", + "optimum.intel.openvino.modeling_diffusion", "OVModelTextEncoder" ], - "text_encoder_2": [ - null, - null - ], "tokenizer": [ "transformers", "CLIPTokenizer" ], "unet": [ - "optimum", + "optimum.intel.openvino.modeling_diffusion", "OVModelUnet" ], - "vae_decoder": [ - "optimum", - "OVModelVaeDecoder" - ], - "vae_encoder": [ - "optimum", - "OVModelVaeEncoder" + "vae": [ + "optimum.intel.openvino.modeling_diffusion", + "OVModelVae" ] } @@ -312,15 +301,6 @@ and there is no need to do it manually ov_pipeline.to(device.value) ov_pipeline.compile() - -.. parsed-literal:: - - Compiling the vae_decoder to AUTO ... - Compiling the unet to AUTO ... - Compiling the vae_encoder to AUTO ... - Compiling the text_encoder to AUTO ... - - .. code:: ipython3 prompt = "A cute squirrel in the forest, portrait, 8k" diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg index c3ee252ebc2731..a19eb369148e10 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5596d96021ad02d2a954c32cfe4679598a3fc331bbb92df84e437b2594a5d525 -size 30142 +oid sha256:8ccbcf6c99b8d75b1c683375852111ce17983fbea50bd0b0bb294d06bbb317fb +size 38768 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png index 484f8da2cd5c03..d27692124393df 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d2a51b952179d6520da3d61f79e1ee7d57d7c22504cf712579110c07b68e63f -size 421582 +oid sha256:b936a832e23c30affb1988828f17a16e988b42413ff7dfe4381fb723e068ed97 +size 456597 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg index bce1a840efae34..e320b91964889a 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e624f572f30ee308bed067571eeb9cb035eb095eabebc0914ac8ae67605896c3 -size 33139 +oid sha256:0ab792ded8330c36072ec13fcfb0e0896ce3014413145167fd7a7c5b570919a4 +size 37604 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png index 9803cd18053390..e7a5d16aa88eb2 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bd853d844aabefb69a678715b479dd74c84bd003eeb0362eb568cc592b8fc69 -size 428147 +oid sha256:f440a9e7cb3d6a9f6609165516be7e5025e26752004ca56baf570f49840af03c +size 470802 diff --git a/docs/notebooks/lcm-lora-controlnet-with-output.rst b/docs/notebooks/lcm-lora-controlnet-with-output.rst index c34511bc579b08..c6383078a89b71 100644 --- a/docs/notebooks/lcm-lora-controlnet-with-output.rst +++ b/docs/notebooks/lcm-lora-controlnet-with-output.rst @@ -236,7 +236,7 @@ Install required packages .. code:: ipython3 %pip install -q "torch" transformers "diffusers>=0.24.0" "controlnet-aux>=0.0.6" "peft>=0.6.2" accelerate --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" + %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" "matplotlib>=3.4" Prepare PyTorch models diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst new file mode 100644 index 00000000000000..435e7eaf62c53b --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst @@ -0,0 +1,509 @@ +Visual-language assistant with LLaVA and OpenVINO Generative API +================================================================ + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `OpenVINO +GenAI `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO GenAI inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO GenAI inference + pipeline <#prepare-openvino-genai-inference-pipeline>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + +- `Run model inference <#run-model-inference>`__ + + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from pathlib import Path + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + !optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path} + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = { + "mode": nncf.CompressWeightsMode.INT4_ASYM, + "group_size": 128, + "ratio": 1, + } + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO GenAI inference pipeline +----------------------------------------- + + + +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. + +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. + +.. code:: ipython3 + + from openvino_genai import VLMPipeline, GenerationConfig + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +For pipeline initialization we should provide path to model directory +and inference device. + +.. code:: ipython3 + + ov_model = VLMPipeline(str(model_base_path / model_variant.value), device=device.value) + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. + +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + import numpy as np + + config = GenerationConfig() + config.max_new_tokens = 100 + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte) + return image, ov.Tensor(image_data) + + + def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + + image, image_tensor = load_image(image_file) + text_message = "What is unusual on this image?" + + prompt = text_message + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + display(image) + print(f"Question:\n{text_message}") + print("Answer:") + output = ov_model.generate(prompt, image=image_tensor, generation_config=config, streamer=streamer) + + + +.. image:: llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png + + +.. parsed-literal:: + + Question: + What is unusual on this image? + Answer: + + The unusual aspect of this image is that a cat is lying inside a cardboard box. Cats are known for their curiosity and love for small, enclosed spaces. However, it is not a common sight to see a cat comfortably resting inside a cardboard box. + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava + + demo = make_demo_llava(ov_model) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst new file mode 100644 index 00000000000000..3c88a8e94388d4 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst @@ -0,0 +1,508 @@ +Visual-language assistant with LLaVA and Optimum Intel OpenVINO integration +=========================================================================== + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `Optimum +Intel `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +The tutorial consists from following steps: + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO-based inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO based inference + pipeline <#prepare-openvino-based-inference-pipeline>`__ +- `Run model inference <#run-model-inference>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from pathlib import Path + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + !optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path} + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1, "all_layers": True} + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO based inference pipeline +----------------------------------------- + + + +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers LLaVA implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForVisualCausalLM + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ov_model = OVModelForVisualCausalLM.from_pretrained(model_base_path / model_variant.value, device=device.value) + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, we will use tokenizer and image processor +defined in the begging of our tutorial. For alignment with original +PyTorch implementation we will use PyTorch tensors as input. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + from transformers import AutoProcessor, AutoConfig + + config = AutoConfig.from_pretrained(model_path) + + processor = AutoProcessor.from_pretrained( + model_path, patch_size=config.vision_config.patch_size, vision_feature_select_strategy=config.vision_feature_select_strategy + ) + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + return image + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + text_message = "What is unusual on this image?" + + image = load_image(image_file) + + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": text_message}, + {"type": "image"}, + ], + }, + ] + + prompt = processor.apply_chat_template(conversation, add_generation_prompt=True) + + inputs = processor(images=image, text=prompt, return_tensors="pt") + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +Generation process for long response maybe time consuming, for accessing +partial result as soon as it is generated without waiting when whole +process finished, Streaming API can be used. Token streaming is the mode +in which the generative system returns the tokens one by one as the +model generates them. This enables showing progressive generations to +the user rather than waiting for the whole generation. Streaming is an +essential aspect of the end-user experience as it reduces latency, one +of the most critical aspects of a smooth experience. You can find more +details about how streaming work in `HuggingFace +documentation `__. + +Also for simplification of preparing input in conversational mode, we +will use Conversation Template helper provided by model authors for +accumulating history of provided messages and images. + +.. code:: ipython3 + + from transformers import TextStreamer + + # Prepare + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + display(image) + print(f"Question: {text_message}") + print("Answer:") + + output_ids = ov_model.generate( + **inputs, + do_sample=False, + max_new_tokens=50, + streamer=streamer, + ) + + + +.. image:: llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png + + +.. parsed-literal:: + + Question: What is unusual on this image? + Answer: + The unusual aspect of this image is that a cat is lying inside a cardboard box, which is not a typical place for a cat to rest. Cats are known for their curiosity and love for small, enclosed spaces, but in this case + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava_optimum + + demo = make_demo_llava_optimum(ov_model, processor) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-with-output.rst deleted file mode 100644 index fde37625041d43..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output.rst +++ /dev/null @@ -1,1342 +0,0 @@ -Visual-language assistant with LLaVA and OpenVINO -================================================= - -`LLaVA `__ (Large Language and Vision -Assistant) is large multimodal model that aims to develop a -general-purpose visual assistant that can follow both language and image -instructions to complete various real-world tasks. The idea is to -combine the power of large language models (LLMs) with vision encoders -like CLIP to create an end-to-end trained neural assistant that -understands and acts upon multimodal instructions. - -In the field of artificial intelligence, the goal is to create a -versatile assistant capable of understanding and executing tasks based -on both visual and language inputs. Current approaches often rely on -large vision models that solve tasks independently, with language only -used to describe image content. While effective, these models have fixed -interfaces with limited interactivity and adaptability to user -instructions. On the other hand, large language models (LLMs) have shown -promise as a universal interface for general-purpose assistants. By -explicitly representing various task instructions in language, these -models can be guided to switch and solve different tasks. To extend this -capability to the multimodal domain, the `LLaVA -paper `__ introduces \`visual -instruction-tuning, a novel approach to building a general-purpose -visual assistant. - -In this tutorial we consider how to use LLaVA model to build multimodal -chatbot. For demonstration purposes we will use -`LLaVA-Lightning-MPT-7B-preview `__ -model for conversion, similar steps required to run other models from -`LLaVA Model -Zoo `__. - -The tutorial consists from following steps: - -- Install prerequisites -- Prepare input processor and tokenizer -- Download original model -- Compress model weights to 4 and 8 bits using NNCF -- Convert model to OpenVINO Intermediate Representation (IR) format -- Prepare OpenVINO-based inference pipeline -- Run OpenVINO model - - -**Table of contents:** - - -- `About model <#about-model>`__ -- `Prerequisites <#prerequisites>`__ -- `Build model tokenizer and image - processor <#build-model-tokenizer-and-image-processor>`__ -- `Build model and convert it to OpenVINO IR - format <#build-model-and-convert-it-to-openvino-ir-format>`__ - - - `Prepare helpers for model - conversion <#prepare-helpers-for-model-conversion>`__ - - `Convert and Optimize Model <#convert-and-optimize-model>`__ - - - `Instantiate PyTorch model <#instantiate-pytorch-model>`__ - - `Compress Model weights to 4 and 8 bits using - NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - - `Convert model to OpenVINO IR - format <#convert-model-to-openvino-ir-format>`__ - -- `Prepare OpenVINO based inference - pipeline <#prepare-openvino-based-inference-pipeline>`__ -- `Run model inference <#run-model-inference>`__ - - - `Select inference device <#select-inference-device>`__ - - `Load OpenVINO model <#load-openvino-model>`__ - - `Prepare input data <#prepare-input-data>`__ - - `Test model inference <#test-model-inference>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -About model ------------ - - - -LLaVA connects pre-trained `CLIP -ViT-L/14 `__ visual encoder and large -language model like Vicuna, LLaMa v2 or MPT, using a simple projection -matrix - -.. figure:: https://llava-vl.github.io/images/llava_arch.png - :alt: vlp_matrix.png - - vlp_matrix.png - -Model training procedure consists of 2 stages: - -- Stage 1: Pre-training for Feature Alignment. Only the projection - matrix is updated, based on a subset of CC3M. -- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM - are updated for two different use scenarios: - - - Visual Chat: LLaVA is fine-tuned on our generated multimodal - instruction-following data for daily user-oriented applications. - - Science QA: LLaVA is fine-tuned on this multimodal reasoning - dataset for the science domain. - -More details about model can be found in original `project -web-page `__, -`paper `__ and -`repo `__. - -Prerequisites -------------- - - - -Install required dependencies - -.. code:: ipython3 - - import sys - - %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" "nncf>=2.7.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.37.2" "gradio>=4.19" "einops" - -.. code:: ipython3 - - from pathlib import Path - - repo_dir = Path("LLaVA") - - if not repo_dir.exists(): - !git clone https://github.com/haotian-liu/LLaVA.git - - sys.path.insert(0, str(repo_dir.resolve())) - -Build model tokenizer and image processor ------------------------------------------ - - - -For starting work with model, we need understand how to prepare input -data first. As it is already discussed before, LLaVA is multimodal model -that accepts input user instructions in text format and image for -analysis. In the same time, LLaVA is combination of 2 fundamental -pretrained models for text and image processing, CLIP and MPT, each of -them has own approach for preparing data - tokenization for input text -and preprocessing for input image. LLaVA reuses these steps with small -adoption: introduced special tokens that serves for specification of -image location in the text that should be injected in provided user -instruction. - -.. code:: ipython3 - - from transformers import AutoTokenizer, AutoConfig, CLIPImageProcessor - from llava.model.language_model.llava_mpt import LlavaMptForCausalLM - - model_id = "liuhaotian/LLaVA-Lightning-MPT-7B-preview" - - config = AutoConfig.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) - image_processor = CLIPImageProcessor.from_pretrained(config.mm_vision_tower) - - -.. parsed-literal:: - - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - - -.. code:: ipython3 - - from llava.constants import ( - DEFAULT_IMAGE_PATCH_TOKEN, - DEFAULT_IM_START_TOKEN, - DEFAULT_IM_END_TOKEN, - DEFAULT_IMAGE_TOKEN, - ) - - mm_use_im_start_end = getattr(config, "mm_use_im_start_end", False) - mm_use_im_patch_token = getattr(config, "mm_use_im_patch_token", True) - if mm_use_im_patch_token: - tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) - if mm_use_im_start_end: - tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True) - - if hasattr(config, "max_sequence_length"): - context_len = config.max_sequence_length - else: - context_len = 2048 - -Build model and convert it to OpenVINO IR format ------------------------------------------------- - - - -LLaVA is autoregressive transformer generative model, it means that each -next model step depends from model output from previous step. The -generation approach is based on the assumption that the probability -distribution of a word sequence can be decomposed into the product of -conditional next word distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of string token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in the -`documentation `__. -To preserve flexibility in the selection decoding methodology, we will -convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``token_embedding`` and -``image_encoder`` models, after that LLM-based part of model runs on -input embeddings to predict probability of next generated tokens. On the -next step, model accepts only next token id selected based on sampling -strategy and cached attention key and values. Since the output side is -auto-regressive, an output token hidden state remains the same once -computed for every further generation step. Therefore, recomputing it -every time you want to generate a new token seems wasteful. With the -cache, the model saves the hidden state once it has been computed. The -model only computes the one for the most recently generated output token -at each time step, re-using the saved ones for hidden tokens. This -reduces the generation complexity from :math:`O(n^3)` to :math:`O(n^2)` -for a transformer model. More details about how it works can be found in -this -`article `__. - -Prepare helpers for model conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -The code below prepares function for converting LLaVA model to OpenVINO -Intermediate Representation format. It splits model on parts described -above, prepare example inputs for each part and convert each part using -`OpenVINO Model Conversion -API `__. -``ov.convert_model`` function accepts PyTorch model instance and returns -``ov.Model`` object that represent model in OpenVINO format. It is ready -to use for loading on device using ``ov.compile_model`` or can be saved -on disk using ``ov.save_model``. - -.. code:: ipython3 - - from functools import wraps - import gc - import warnings - import torch - import openvino as ov - import nncf - from typing import Optional, Tuple, List - import torch.nn.functional as F - - warnings.filterwarnings("ignore") - - - class ModelWrapper(torch.nn.Module): - """ - Model wrapper class for export for spliting original forward logic on preparing multimodal data and inference using it. - That allows us to sperate image encoder and token embeddings model from general flow. - """ - - def __init__(self, model): - super().__init__() - self.model = model - - def forward( - self, - input_ids: torch.LongTensor = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - attention_mask: Optional[torch.Tensor] = None, - ): - outputs = self.model.transformer( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - past_key_values=past_key_values, - attention_mask=attention_mask, - return_dict=True, - output_attentions=False, - output_hidden_states=False, - use_cache=True, - ) - logits = F.linear( - outputs.last_hidden_state.to(self.model.transformer.wte.weight.device), - self.model.transformer.wte.weight.to(outputs.last_hidden_state.dtype), - ) - - return (logits, tuple(outputs.past_key_values)) - - - def patch_model_forward(model): - """ - Helper function for patching model forward for model with past. - It makes model more convinient for export to TorchScript format avoiding limitation - that list of tensors can not be correctly traced as model input - """ - - orig_forward = model.forward - - @wraps(orig_forward) - def ts_patched_forward( - input_ids: torch.Tensor, - past_key_values: Tuple[Tuple[torch.Tensor]], - attention_mask: torch.LongTensor, - ): - pkv_list = list(past_key_values) - outs = orig_forward( - input_ids=input_ids, - past_key_values=pkv_list, - attention_mask=attention_mask, - ) - return outs - - model.forward = ts_patched_forward - return model - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - - def convert_llava_mpt( - pt_model: torch.nn.Module, - model_path: Path, - image_encoder_wc_parameters: Optional[dict] = None, - llava_wc_parameters: Optional[dict] = None, - ): - """ - LLaVA MPT model conversion function - - Params: - pt_model: PyTorch model - model_path: path for saving model - Returns: - None - """ - ov_out_path = Path(model_path) - pt_model.config.save_pretrained(ov_out_path) - pt_model.config.use_cache = True - pt_model.config.torchscript = True - first_stage_model_path = ov_out_path / "llava_input_embed.xml" - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - second_stage_model_path = ov_out_path / "llava_with_past.xml" - if not image_encoder_path.exists(): - model.forward = model.encode_images - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 224, 224)), - input=[(-1, 3, 224, 224)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - if not token_embedding_model_path.exists(): - model.forward = model.get_model().embed_tokens - ov_model = ov.convert_model(model, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - if first_stage_model_path.exists() and second_stage_model_path.exists(): - print("LLaVA model successfully converted") - del pt_model - return - model_wrap = ModelWrapper(model) - example_input_first_stage = { - "inputs_embeds": torch.zeros((1, 307, 4096)), - "attention_mask": torch.ones((1, 307), dtype=torch.long), - } - outs = model_wrap(**example_input_first_stage) - inputs = ["input_ids"] - outputs = ["logits"] - dynamic_shapes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} - for idx in range(len(outs[1])): - inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) - dynamic_shapes[inputs[-1]] = {2: "past_sequence + sequence"} - dynamic_shapes[inputs[-2]] = {2: "past_sequence + sequence"} - outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) - - inputs.extend(["attention_mask"]) - if not first_stage_model_path.exists(): - ov_model = ov.convert_model(model_wrap, example_input=example_input_first_stage) - ov_model = postprocess_converted_model(ov_model, output_names=outputs) - if llava_wc_parameters is not None: - print("Applying weight compression to first stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, first_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - if not second_stage_model_path.exists(): - model_wrap = patch_model_forward(model_wrap) - example_input_second_stage = { - "input_ids": torch.ones((1, 1), dtype=torch.long), - "past_key_values": outs[1], - "attention_mask": torch.ones((1, outs[1][-1][-1].shape[-2] + 1), dtype=torch.long), - } - ov_model = ov.convert_model(model_wrap, example_input=example_input_second_stage) - ov_model = postprocess_converted_model( - ov_model, - example_input=example_input_second_stage.values(), - input_names=inputs, - output_names=outputs, - dynamic_shapes=dynamic_shapes, - ) - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, second_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("LLaVA model successfully converted") - del model_wrap - del pt_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino - - -Convert and Optimize Model -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Our model conversion and optimization consist of following steps: 1. -Download original PyTorch model. 2. Compress model weights using NNCF 3. -Convert model to OpenVINO format and save it on disk. - -Let’s consider each step more deeply. - -Instantiate PyTorch model -^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For creating PyTorch model we should use ``from_pretrained`` method of -``LlavaMPTForCausalLM`` model class. Model weights will be downloaded -from `HuggingFace hub `__ during first -run. It may takes some time and requires at least 13 Gb free space on -disk. - -Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For reducing memory consumption, weights compression optimization can be -applied using `NNCF `__. Weight -compression aims to reduce the memory footprint of a model. It can also -lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). LLMs and other models, -which require extensive memory to store the weights during inference, -can benefit from weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined above. - -Please select below whether you would like to run INT4 weight -compression instead of INT8 weight compression. - -.. code:: ipython3 - - import ipywidgets as widgets - - compression_mode = widgets.Dropdown( - options=["INT4", "INT8"], - value="INT4", - description="Compression mode:", - disabled=False, - ) - - compression_mode - - - - -.. parsed-literal:: - - Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') - - - -.. code:: ipython3 - - if compression_mode.value == "INT4": - compressed_model_dir = Path("llava-mpt/INT4_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) - else: - compressed_model_dir = Path("llava-mpt/INT8_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - if not compressed_model_dir.exists(): - compressed_model_dir.mkdir(exist_ok=True, parents=True) - config.save_pretrained(compressed_model_dir) - model = LlavaMptForCausalLM.from_pretrained(model_id) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - if mm_use_im_start_end: - model.resize_token_embeddings(len(tokenizer)) - - model.eval() - with torch.no_grad(): - convert_llava_mpt( - model, - compressed_model_dir, - image_encoder_wc_parameters=dict(mode=nncf.CompressWeightsMode.INT8), - llava_wc_parameters=llava_wc_parameters, - ) - del model - gc.collect(); - - - -.. parsed-literal:: - - Loading checkpoint shards: 0%| | 0/2 [00:00`__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - import numpy as np - import torch - - - class OVLlavaMPTForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.token_embed = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.model_input_embed = core.compile_model(model_dir / "llava_input_embed.xml", device) - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, prefix_mask, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = {} - if past_key_values is not None: - # Flatten the past_key_values - attention_mask = torch.ones( - (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), - dtype=input_ids.dtype, - ) - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - # Add the past_key_values to the decoder inputs - inputs = dict(zip(self.key_value_input_names, past_key_values)) - - else: - return self.forward_with_image(input_ids, images, attention_mask) - inputs["input_ids"] = np.array(input_ids) - - if "attention_mask" in self.input_names: - inputs["attention_mask"] = np.array(attention_mask) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def forward_with_image(self, input_ids, images, attention_mask): - """First step inference method, that resolves multimodal data""" - input_embed, attention_mask = self.prepare_multimodal_input(input_ids, images, attention_mask) - outs = self.model_input_embed([input_embed, attention_mask]) - logits = outs[0] - pkv = list(outs.values())[1:] - pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) - return CausalLMOutputWithPast(logits=torch.from_numpy(logits), past_key_values=pkv) - - def prepare_multimodal_input(self, input_ids, images, attention_mask): - """Preprocessing function for embedding multimodal data""" - image_features = [] - if images is not None: - image_features = self.image_encoder(images)[0] - - new_input_embeds = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - if (cur_input_ids == IMAGE_TOKEN_INDEX).sum() == 0: - # multimodal LLM, but the current sample is not multimodal - cur_input_embeds = torch.from_numpy(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - new_input_embeds.append(cur_input_embeds) - cur_image_idx += 1 - continue - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - cur_new_input_embeds = [] - while image_token_indices.numel() > 0: - cur_image_features = image_features[cur_image_idx] - image_token_start = image_token_indices[0] - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - embd = self.token_embed(cur_input_ids[: image_token_start - 1].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - embd = self.token_embed(cur_input_ids[image_token_start - 1 : image_token_start].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - cur_new_input_embeds.append(cur_image_features) - embd = self.token_embed(cur_input_ids[image_token_start + 1 : image_token_start + 2].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids[:image_token_start].unsqueeze(0))[0][0]) - cur_new_input_embeds.append(cur_image_features) - cur_image_idx += 1 - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_input_ids = cur_input_ids[image_token_start + 2 :] - else: - cur_input_ids = cur_input_ids[image_token_start + 1 :] - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - if cur_input_ids.numel() > 0: - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - cur_new_input_embeds = [torch.from_numpy(x) for x in cur_new_input_embeds] - cur_new_input_embeds = torch.cat(cur_new_input_embeds, dim=0) - new_input_embeds.append(cur_new_input_embeds) - - if any(x.shape != new_input_embeds[0].shape for x in new_input_embeds): - max_len = max(x.shape[0] for x in new_input_embeds) - - new_input_embeds_align = [] - for cur_new_embed in new_input_embeds: - cur_new_embed = torch.cat( - ( - cur_new_embed, - torch.zeros( - (max_len - cur_new_embed.shape[0], cur_new_embed.shape[1]), - dtype=cur_new_embed.dtype, - ), - ), - dim=0, - ) - new_input_embeds_align.append(cur_new_embed) - new_input_embeds = torch.stack(new_input_embeds_align, dim=0) - - if attention_mask is not None: - new_attention_mask = [] - for cur_attention_mask, cur_new_labels, cur_new_labels_align in zip(attention_mask, _new_labels, new_labels): - new_attn_mask_pad_left = torch.full( - (cur_new_labels.shape[0] - labels.shape[1],), - True, - dtype=attention_mask.dtype, - ) - new_attn_mask_pad_right = torch.full( - (cur_new_labels_align.shape[0] - cur_new_labels.shape[0],), - False, - dtype=attention_mask.dtype, - ) - cur_new_attention_mask = torch.cat( - ( - new_attn_mask_pad_left, - cur_attention_mask, - new_attn_mask_pad_right, - ), - dim=0, - ) - new_attention_mask.append(cur_new_attention_mask) - attention_mask = torch.stack(new_attention_mask, dim=0) - assert attention_mask.shape == new_labels.shape - else: - new_input_embeds = torch.stack(new_input_embeds, dim=0) - - if attention_mask is not None: - new_attn_mask_pad_left = torch.full( - ( - attention_mask.shape[0], - new_input_embeds.shape[1] - input_ids.shape[1], - ), - True, - dtype=attention_mask.dtype, - ) - attention_mask = torch.cat((new_attn_mask_pad_left, attention_mask), dim=1) - assert attention_mask.shape == new_input_embeds.shape[:2] - - return new_input_embeds, attention_mask - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - if not kwargs.get("use_cache", True): - raise NotImplementedError("MPT with prefix_lm=True does not support use_cache=False.") - else: - prefix_mask = None - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "prefix_mask": prefix_mask, - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - -Run model inference -------------------- - - - -Now, when we have model and defined generation pipeline, we can run -model inference. - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -Select device from dropdown list for running inference using OpenVINO. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import device_widget - - device = device_widget(exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') - - - -Load OpenVINO model -~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - core = ov.Core() - - ov_model = OVLlavaMPTForCausalLM(core, compressed_model_dir, device.value) - -Prepare input data -~~~~~~~~~~~~~~~~~~ - - - -For preparing input data, we will use tokenizer and image processor -defined in the begging of our tutorial. For alignment with original -PyTorch implementation we will use PyTorch tensors as input. - -.. code:: ipython3 - - import requests - from PIL import Image - from io import BytesIO - - - def load_image(image_file): - if image_file.startswith("http") or image_file.startswith("https"): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - return image - - - image_file = "https://llava-vl.github.io/static/images/view.jpg" - - image = load_image(image_file) - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - - text_message = "What are the things I should be cautious about when I visit here?" - print(f"Question: {text_message}") - image - - -.. parsed-literal:: - - Question: What are the things I should be cautious about when I visit here? - - - - -.. image:: llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png - - - -Test model inference -~~~~~~~~~~~~~~~~~~~~ - - - -Generation process for long response maybe time consuming, for accessing -partial result as soon as it is generated without waiting when whole -process finished, Streaming API can be used. Token streaming is the mode -in which the generative system returns the tokens one by one as the -model generates them. This enables showing progressive generations to -the user rather than waiting for the whole generation. Streaming is an -essential aspect of the end-user experience as it reduces latency, one -of the most critical aspects of a smooth experience. You can find more -details about how streaming work in `HuggingFace -documentation `__. - -Also for simplification of preparing input in conversational mode, we -will use Conversation Template helper provided by model authors for -accumulating history of provided messages and images. - -.. code:: ipython3 - - from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria - from llava.constants import IMAGE_TOKEN_INDEX - from transformers import TextStreamer - from llava.conversation import conv_templates, SeparatorStyle - - # Prepare - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - conv_mode = "mpt" - - conv = conv_templates[conv_mode].copy() - roles = ("user", "assistant") - - if mm_use_im_start_end: - inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + text_message - else: - inp = DEFAULT_IMAGE_TOKEN + "\n" + text_message - conv.append_message(conv.roles[0], inp) - conv.append_message(conv.roles[1], None) - - prompt = conv.get_prompt() - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - print("Answer:") - - output_ids = ov_model.generate( - input_ids, - images=image_tensor, - do_sample=True, - temperature=0.2, - max_new_tokens=1024, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - -.. parsed-literal:: - - Answer: - When visiting this location, I should be cautious about the water level and the presence of boats. The image shows a dock with a boat in the water, and the water appears to be relatively shallow. It is essential to be mindful of the water depth when approaching the dock, as it could be dangerous to step into the water without checking the water level. Additionally, I should be aware of the boats in the water, as they could pose a risk if they are not properly secured or if they are not being used as intended. It is crucial to maintain a safe distance from the boats and follow any posted signs or guidelines to ensure a safe and enjoyable experience. - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from threading import Event, Thread - from transformers import TextIteratorStreamer - - conv = conv_templates[conv_mode].copy() - conv.messages = [] - - - def clear_history(textbox, imagebox, chatbot): - """ - callback function for clearing chat windows in interface on clear button click - - Params: - textbox: current textbox for user messages state - imagebox: current imagebox state - chatbot: current chatbot state - Returns: - empty textbox, imagebox and chatbot states - """ - conv.messages = [] - - return None, None, None - - - def handle_user_message(message, history): - """ - callback function for updating user messages in interface on submit button click - - Params: - message: current message - history: conversation history - Returns: - updated message and conversation history - """ - # Append the user's message to the conversation history - return "", history + [[message, ""]] - - - def run_chatbot(image, history, temperature=0.2, top_p=0.7, max_new_tokens=1024): - """ - callback function for running chatbot on submit button click - - Params: - history: conversation history - temperature: parameter for control the level of creativity in AI-generated text. - By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. - top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. - - """ - - text = history[-1][0] - if len(text) <= 0 and image is None: - conv.skip_next = True - yield history - text = text[:1536] # Hard cut-off - if image is not None: - text = text[:1200] # Hard cut-off for images - if "" not in text: - text = text + "\n" - text = (text, image, "Resize") - conv.append_message(conv.roles[0], text) - conv.append_message(conv.roles[1], None) - conv.skip_next = False - - # Construct the input message string for the model by concatenating the current system message and conversation history - prompt = conv.get_prompt() - image = conv.get_images(return_pil=True) - if not image: - image_tensor = None - else: - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - # Tokenize the messages string - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - generate_kwargs = dict( - input_ids=input_ids, - images=image_tensor, - max_new_tokens=max_new_tokens, - temperature=temperature, - do_sample=temperature > 0.001, - top_p=top_p, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - stream_complete = Event() - - def generate_and_signal_complete(): - """ - genration function for single thread - """ - ov_model.generate(**generate_kwargs) - stream_complete.set() - - t1 = Thread(target=generate_and_signal_complete) - t1.start() - - # Initialize an empty string to store the generated text - partial_text = "" - for new_text in streamer: - if not new_text: - continue - partial_text += new_text - conv.messages[-1][-1] = partial_text - history[-1][1] = partial_text - yield history - -.. code:: ipython3 - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo_llava - - demo = make_demo_llava(handle_user_message=handle_user_message, run_chatbot=run_chatbot, clear_history=clear_history) - - try: - demo.queue(max_size=2).launch(debug=False) - except Exception: - demo.queue(max_size=2).launch(share=True, debug=False) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg deleted file mode 100644 index 29fc338b516a09..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f825c10443339b42cb5e2415f48bb7bafb4e087fb29bce6d2feaf3c2f89788c8 -size 72374 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png deleted file mode 100644 index c1062ffb3d6d10..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dde262e54da6d8dad5062989d7863db7cd85ac0403b9015a76f5884472f67ceb -size 599941 diff --git a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst index b903b7d5081d94..08ca24744bf216 100644 --- a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst +++ b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst @@ -397,7 +397,7 @@ For example, to register your own image generation tool: In this notebook, we will create 3 tools as examples: - **image_generation**: AI painting (image generation) service, input text - description, and return the image URL drawn based on text information. + description, and return the image URL drawn based on text information. - **get_current_weather**: Get the current weather in a given city name. - **wikipedia**: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst new file mode 100644 index 00000000000000..791355276fd2fd --- /dev/null +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -0,0 +1,560 @@ +Create a native Agent with OpenVINO +=================================== + +LLM are limited to the knowledge on which they have been trained and the +additional knowledge provided as context, as a result, if a useful piece +of information is missing the provided knowledge, the model cannot “go +around” and try to find it in other sources. This is the reason why we +need to introduce the concept of Agents. + +The core idea of agents is to use a language model to choose a sequence +of actions to take. In agents, a language model is used as a reasoning +engine to determine which actions to take and in which order. Agents can +be seen as applications powered by LLMs and integrated with a set of +tools like search engines, databases, websites, and so on. Within an +agent, the LLM is the reasoning engine that, based on the user input, is +able to plan and execute a set of actions that are needed to fulfill the +request. + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/22fa5396-8381-400f-a78f-97e25d57d807 + :alt: agent + + agent + +This example will demonstrate how to create a native agent with +OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Create LLM as agent <#create-llm-as-agent>`__ + + - `Download model <#select-model>`__ + - `Select inference device for + LLM <#select-inference-device-for-llm>`__ + - `Instantiate LLM using Optimum + Intel <#instantiate-llm-using-optimum-intel>`__ + - `Create text generation method <#create-text-generation-method>`__ + +- `Create prompt template <#create-prompt-template>`__ +- `Create parser <#create-parers>`__ +- `Create tools calling <#create-tool-calling>`__ +- `Run agent <#run-agent>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import os + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + from pip_helper import pip_install + + pip_install( + "-q", + "--extra-index-url", + "https://download.pytorch.org/whl/cpu", + "transformers>=4.43.1", + ) + pip_install("-q", "git+https://github.com/huggingface/optimum-intel.git", "git+https://github.com/openvinotoolkit/nncf.git", "datasets", "accelerate") + pip_install("--pre", "-Uq", "openvino>=2024.4.0", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") + +Create LLM as agent +------------------- + + + +Download LLM +~~~~~~~~~~~~ + + + +To run LLM locally, we have to download the model in the first step. It +is possible to `export your +model `__ +to the OpenVINO IR format with the CLI, and load the model from local +folder. + +Large Language Models (LLMs) are a core component of agent. LlamaIndex +does not serve its own LLMs, but rather provides a standard interface +for interacting with many different LLMs. In this example, we can select +``Qwen2.5`` as LLM in agent pipeline. \* +**qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - +Qwen2.5 is the latest series of Qwen large language models. Comparing +with Qwen2, Qwen2.5 series brings significant improvements in coding, +mathematics and general knowledge skills. Additionally, it brings +long-context and multiple languages support including Chinese, English, +French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, +Vietnamese, Thai, Arabic, and more. For more details, please refer to +`model_card `__, +`blog `__, +`GitHub `__, and +`Documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] + + llm_model_id = widgets.Dropdown( + options=llm_model_ids, + value=llm_model_ids[0], + description="Model:", + disabled=False, + ) + + llm_model_id + + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('Qwen/Qwen2.5-3B-Instruct', 'Qwen/Qwen2.5-7B-Instruct', 'Qwen/qwen2.5-… + + + +.. code:: ipython3 + + from pathlib import Path + + llm_model_path = llm_model_id.value.split("/")[-1] + + if not Path(llm_model_path).exists(): + !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} + +Select inference device for LLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + llm_device = device_widget("CPU", exclude=["NPU"]) + + llm_device + + +.. parsed-literal:: + + [ERROR] 20:00:52.380 [NPUBackends] Cannot find backend for inference. Make sure the device is available. + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + + + +Instantiate LLM using Optimum Intel +----------------------------------- + + + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Below is an example of the RedPajama model + +.. code:: diff + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + model_id = "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + +Model class initialization starts with calling ``from_pretrained`` +method. When downloading and converting Transformers model, the +parameter ``export=True`` should be added (as we already converted model +before, we do not need to provide this parameter). We can save the +converted model for the next usage with the ``save_pretrained`` method. +Tokenizer class and pipelines API are compatible with Optimum models. + +You can find more details about OpenVINO LLM inference using HuggingFace +Optimum API in `LLM inference +guide `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, AutoConfig, TextStreamer + from transformers.generation import ( + StoppingCriteriaList, + StoppingCriteria, + ) + import openvino.properties as props + import openvino.properties.hint as hints + import openvino.properties.streams as streams + + import json + import json5 + import torch + + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) + + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} + + llm = OVModelForCausalLM.from_pretrained( + llm_model_path, + device=llm_device.value, + ov_config=ov_config, + config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), + trust_remote_code=True, + ) + + llm.generation_config.top_k = 1 + llm.generation_config.max_length = 2000 + +Create text generation method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +In this example, we would like to stream the output text though +``TextStreamer``, and stop text generation before ``Observation`` +received from tool calling.. + +.. code:: ipython3 + + class StopSequenceCriteria(StoppingCriteria): + """ + This class can be used to stop generation whenever a sequence of tokens is encountered. + + Args: + stop_sequences (`str` or `List[str]`): + The sequence (or list of sequences) on which to stop execution. + tokenizer: + The tokenizer used to decode the model outputs. + """ + + def __init__(self, stop_sequences, tokenizer): + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + self.stop_sequences = stop_sequences + self.tokenizer = tokenizer + + def __call__(self, input_ids, scores, **kwargs) -> bool: + decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) + return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) + + + def text_completion(prompt: str, stop_words) -> str: + im_end = "<|im_end|>" + if im_end not in stop_words: + stop_words = stop_words + [im_end] + streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) + + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) + input_ids = torch.tensor([tokenizer.encode(prompt)]) + generate_kwargs = dict( + input_ids=input_ids, + streamer=streamer, + stopping_criteria=stopping_criteria, + ) + output = llm.generate(**generate_kwargs) + output = output.tolist()[0] + output = tokenizer.decode(output, errors="ignore") + assert output.startswith(prompt) + output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") + + for stop_str in stop_words: + idx = output.find(stop_str) + if idx != -1: + output = output[: idx + len(stop_str)] + return output + +Create prompt template +---------------------- + + + +A prompt for a language model is a set of instructions or input provided +by a user to guide the model’s response, helping it understand the +context and generate relevant and coherent language-based output, such +as answering questions, completing sentences, or engaging in a +conversation. + +Different agents have different prompting styles for reasoning. In this +example, we will use `ReAct agent `__ with +its typical prompt template. For a full list of built-in agents see +`agent +types `__. + +.. figure:: https://github.com/user-attachments/assets/c26432c2-3cf1-4942-ae03-fd8e8ebb4509 + :alt: react + + react + +A ReAct prompt consists of few-shot task-solving trajectories, with +human-written text reasoning traces and actions, as well as environment +observations in response to actions. ReAct prompting is intuitive and +flexible to design, and achieves state-of-the-art few-shot performances +across a variety of tasks, from question answering to online shopping! + +In an prompt template for agent, ``query`` is user’s query and other +parameter should be a sequence of messages that contains the +``descriptions`` and ``parameters`` of agent tool. + +.. code:: ipython3 + + TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" + + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: + + {tools_text} + + Use the following format: + + Question: the input question you must answer + Thought: you should always think about what to do + Action: the action to take, should be one of [{tools_name_text}] + Action Input: the input to the action + Observation: the result of the action + ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) + Thought: I now know the final answer + Final Answer: the final answer to the original input question + + Begin! + + Question: {query}""" + +Meanwhile we have to create function for consolidate the tools +information and conversation history into the prompt template. + +.. code:: ipython3 + + def build_input_text(chat_history, list_of_tool_info) -> str: + tools_text = [] + for tool_info in list_of_tool_info: + tool = TOOL_DESC.format( + name_for_model=tool_info["name_for_model"], + name_for_human=tool_info["name_for_human"], + description_for_model=tool_info["description_for_model"], + parameters=json.dumps(tool_info["parameters"], ensure_ascii=False), + ) + if tool_info.get("args_format", "json") == "json": + tool += " Format the arguments as a JSON object." + elif tool_info["args_format"] == "code": + tool += " Enclose the code within triple backticks (`) at the beginning and end of the code." + else: + raise NotImplementedError + tools_text.append(tool) + tools_text = "\n\n".join(tools_text) + + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) + + messages = [{"role": "system", "content": "You are a helpful assistant."}] + for i, (query, response) in enumerate(chat_history): + if list_of_tool_info: + if (len(chat_history) == 1) or (i == len(chat_history) - 2): + query = PROMPT_REACT.format( + tools_text=tools_text, + tools_name_text=tools_name_text, + query=query, + ) + if query: + messages.append({"role": "user", "content": query}) + if response: + messages.append({"role": "assistant", "content": response}) + + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") + + return prompt + +Create parser +------------- + + + +A Parser is used to convert raw output of LLM to the input arguments of +tools. + +.. code:: ipython3 + + def parse_latest_tool_call(text): + tool_name, tool_args = "", "" + i = text.rfind("\nAction:") + j = text.rfind("\nAction Input:") + k = text.rfind("\nObservation:") + if 0 <= i < j: # If the text has `Action` and `Action input`, + if k < j: # but does not contain `Observation`, + # then it is likely that `Observation` is ommited by the LLM, + # because the output text may have discarded the stop word. + text = text.rstrip() + "\nObservation:" # Add it back. + k = text.rfind("\nObservation:") + tool_name = text[i + len("\nAction:") : j].strip() + tool_args = text[j + len("\nAction Input:") : k].strip() + text = text[:k] + return tool_name, tool_args, text + +Create tools calling +-------------------- + + + +In this examples, we will create 2 customized tools for +``image generation`` and ``weather qurey``. A detailed description of +these tools should be defined in json format, which will be used as part +of prompt. + +.. code:: ipython3 + + tools = [ + { + "name_for_human": "get weather", + "name_for_model": "get_weather", + "description_for_model": 'Get the current weather in a given city name."', + "parameters": [ + { + "name": "city_name", + "description": "City name", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + { + "name_for_human": "image generation", + "name_for_model": "image_gen", + "description_for_model": "AI painting (image generation) service, input text description, and return the image URL drawn based on text information.", + "parameters": [ + { + "name": "prompt", + "description": "describe the image", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + ] + +Then we should implement these tools with inputs and outputs, and +execute them according to the output of LLM. + +.. code:: ipython3 + + def call_tool(tool_name: str, tool_args: str) -> str: + if tool_name == "get_weather": + city_name = json5.loads(tool_args)["city_name"] + key_selection = { + "current_condition": [ + "temp_C", + "FeelsLikeC", + "humidity", + "weatherDesc", + "observation_time", + ], + } + resp = requests.get(f"https://wttr.in/{city_name}?format=j1") + resp.raise_for_status() + resp = resp.json() + ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()} + return str(ret) + elif tool_name == "image_gen": + import urllib.parse + + tool_args = tool_args.replace("(", "").replace(")", "") + prompt = json5.loads(tool_args)["prompt"] + prompt = urllib.parse.quote(prompt) + return json.dumps( + {"image_url": f"https://image.pollinations.ai/prompt/{prompt}"}, + ensure_ascii=False, + ) + else: + raise NotImplementedError + + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): + chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] + + planning_prompt = build_input_text(chat_history, list_of_tool_info) + text = "" + while True: + output = text_completion(planning_prompt + text, stop_words=["Observation:", "Observation:\n"]) + action, action_input, output = parse_latest_tool_call(output) + if action: + observation = call_tool(action, action_input) + output += f"\nObservation: = {observation}\nThought:" + observation = f"{observation}\nThought:" + print(observation) + text += output + else: + text += output + break + + new_history = [] + new_history.extend(history) + new_history.append({"user": prompt, "bot": text}) + return text, new_history + +Run agent +--------- + + + +.. code:: ipython3 + + history = [] + query = "get the weather in London, and create a picture of Big Ben based on the weather information" + + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) + + +.. parsed-literal:: + + Thought: First, I need to use the get_weather API to get the current weather in London. + Action: get_weather + Action Input: {"city_name": "London"} + Observation: + {'current_condition': {'temp_C': '11', 'FeelsLikeC': '10', 'humidity': '94', 'weatherDesc': [{'value': 'Overcast'}], 'observation_time': '12:23 AM'}} + Thought: + Now that I have the weather information, I will use the image_gen API to generate an image of Big Ben based on the weather conditions. + Action: image_gen + Action Input: {"prompt": "Big Ben under overcast sky with temperature 11°C and humidity 94%"} + Observation: + {"image_url": "https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25"} + Thought: + The image has been generated successfully. + Final Answer: The current weather in London is overcast with a temperature of 11°C and humidity of 94%. Based on this information, here is the image of Big Ben under an overcast sky: ![](https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25) + diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index ac4c22ffc3b185..dab94c37d77a4c 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -97,15 +97,6 @@ Install required dependencies "transformers>=4.43.1" \ "onnx<=1.16.1; sys_platform=='win32'" "einops" "transformers_stream_generator" "tiktoken" "bitsandbytes" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import os @@ -199,8 +190,16 @@ several options for model weight compression:
+.. raw:: html + + + Click here to see available models options +.. raw:: html + + + - **tiny-llama-1b-chat** - This is the chat model finetuned on top of `TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T `__. The TinyLlama project aims to pretrain a 1.1B Llama model on 3 @@ -293,9 +292,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -387,9 +387,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -666,7 +667,7 @@ Click here to see available models options .. parsed-literal:: - Selected model qwen2.5-0.5b-instruct with INT4 compression + Selected model qwen2-0.5b-instruct with INT4 compression Convert model using Optimum-CLI tool @@ -674,8 +675,8 @@ Convert model using Optimum-CLI tool -`Optimum Intel `__ is -the interface between the +`Optimum Intel `__ +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -688,8 +689,16 @@ format.
+.. raw:: html + + + Click here to read more about Optimum CLI usage +.. raw:: html + + + The command bellow demonstrates basic command for model export with ``optimum-cli`` @@ -722,8 +731,16 @@ with the CLI.
+.. raw:: html + + + Click here to read more about weights compression with Optimum CLI +.. raw:: html + + + Setting ``--weight-format`` to respectively fp16, int8 or int4. This type of optimization allows to reduce the memory footprint and inference latency. By default the quantization scheme for int8/int4 will be @@ -766,47 +783,7 @@ be additionally applied during model export with INT4 precision using .. parsed-literal:: - ⌛ qwen2.5-0.5b-instruct conversion to INT4 started. It may takes some time. - - - -**Export command:** - - - -``optimum-cli export openvino --model Qwen/Qwen2.5-0.5B-Instruct --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 1.0 --sym qwen2.5/INT4_compressed_weights`` - - -.. parsed-literal:: - - 2024-10-08 02:53:02.359208: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:53:02.392956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:53:02.929372: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> True - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - Set tokenizer padding side to left for `text-generation-with-past` task. - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (1 / 169) │ 0% (0 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 72% (168 / 169) │ 100% (168 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:14 • 0:00:00 - ✅ INT4 qwen2.5-0.5b-instruct model converted and can be found in qwen2.5/INT4_compressed_weights + ✅ INT4 qwen2-0.5b-instruct model already converted and can be found in qwen2/INT4_compressed_weights Let’s compare model size for different compression types @@ -820,7 +797,7 @@ Let’s compare model size for different compression types .. parsed-literal:: - Size of model with INT4 compressed weights is 322.44 MB + Size of model with INT4 compressed weights is 358.86 MB Select device for inference @@ -891,10 +868,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: - Loading model from qwen2.5/INT4_compressed_weights + Loading model from qwen2/INT4_compressed_weights Input text: The Sun is yellow bacause - of its coloration. The Sun is blue because + it is made of hydrogen and oxygen atoms. The Run Chatbot @@ -909,8 +886,16 @@ Now, when model created, we can setup Chatbot interface using
+.. raw:: html + + + Click here to see how pipeline works +.. raw:: html + + + The diagram below illustrates how the chatbot pipeline works .. figure:: https://github.com/user-attachments/assets/9c9b56e1-01a6-48d8-aa46-222a88e25066 @@ -963,8 +948,16 @@ Advanced generation options
+.. raw:: html + + + Click here to see detailed description of advanced options +.. raw:: html + + + | There are several parameters that can control text generation quality, \* ``Temperature`` is a parameter used to control the level of creativity in AI-generated text. By adjusting the ``temperature``, you @@ -1036,27 +1029,13 @@ Click here to see detailed description of advanced options demo = make_demo(pipe, model_configuration, model_id, lang.value) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) + demo.launch(debug=True, share=True) # If you are launching remotely, specify server_name and server_port # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index cbd76c0544ba82..0d214f5cccc0fc 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -83,9 +83,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly @@ -104,12 +104,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -184,7 +184,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -217,7 +217,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -278,7 +278,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -311,7 +311,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -345,7 +345,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -379,7 +379,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -410,7 +410,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -545,14 +545,14 @@ Click here to see available models options .. code:: ipython3 model_languages = list(SUPPORTED_LLM_MODELS) - + model_language = widgets.Dropdown( options=model_languages, value=model_languages[0], description="Model Language:", disabled=False, ) - + model_language @@ -567,14 +567,14 @@ Click here to see available models options .. code:: ipython3 model_ids = list(SUPPORTED_LLM_MODELS[model_language.value]) - + model_id = widgets.Dropdown( options=model_ids, value=model_ids[0], description="Model:", disabled=False, ) - + model_id @@ -603,7 +603,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ is -the interface between the +the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -655,12 +655,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. @@ -671,7 +672,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from IPython.display import Markdown, display - + prepare_int4_model = widgets.Checkbox( value=True, description="Prepare INT4 model", @@ -687,7 +688,7 @@ sacrifice of the model size and inference latency. description="Prepare FP16 model", disabled=False, ) - + display(prepare_int4_model) display(prepare_int8_model) display(prepare_fp16_model) @@ -756,14 +757,14 @@ We can now save floating point and compressed model variants .. code:: ipython3 from pathlib import Path - + pt_model_id = model_configuration["model_id"] pt_model_name = model_id.value.split("-")[0] fp16_model_dir = Path(model_id.value) / "FP16" int8_model_dir = Path(model_id.value) / "INT8_compressed_weights" int4_model_dir = Path(model_id.value) / "INT4_compressed_weights" - - + + def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return @@ -775,8 +776,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return @@ -789,8 +790,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int4(): compression_configs = { "zephyr-7b-beta": { @@ -865,7 +866,7 @@ We can now save floating point and compressed model variants "ratio": 0.8, }, } - + model_compression_params = compression_configs.get(model_id.value, compression_configs["default"]) if (int4_model_dir / "openvino_model.xml").exists(): return @@ -883,8 +884,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + if prepare_fp16_model.value: convert_to_fp16() if prepare_int8_model.value: @@ -899,7 +900,7 @@ Let’s compare model size for different compression types fp16_weights = fp16_model_dir / "openvino_model.bin" int8_weights = int8_model_dir / "openvino_model.bin" int4_weights = int4_model_dir / "openvino_model.bin" - + if fp16_weights.exists(): print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): @@ -925,16 +926,16 @@ Select device for inference and model variant .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -958,14 +959,14 @@ variant of model weights and inference device available_models.append("INT8") if fp16_model_dir.exists(): available_models.append("FP16") - + model_to_run = widgets.Dropdown( options=available_models, value=available_models[0], description="Model to run:", disabled=False, ) - + model_to_run @@ -1017,13 +1018,13 @@ guide `__ from transformers import AutoConfig, AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM - + import openvino as ov import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - - + + if model_to_run.value == "INT4": model_dir = int4_model_dir elif model_to_run.value == "INT8": @@ -1031,22 +1032,22 @@ guide `__ else: model_dir = fp16_model_dir print(f"Loading model from {model_dir}") - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + if "GPU" in device.value and "qwen2-7b-instruct" in model_id.value: ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO" - + # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy # issues caused by this, which we avoid by setting precision hint to "f32". core = ov.Core() - + if model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and device.value in ["GPU", "AUTO"]: ov_config["INFERENCE_PRECISION_HINT"] = "f32" - + model_name = model_configuration["model_id"] tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) - + ov_model = OVModelForCausalLM.from_pretrained( model_dir, device=device.value, @@ -1120,14 +1121,14 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html :: - playing: 0.5 - sleeping: 0.25 - eating: 0.15 - driving: 0.05 - flying: 0.05 + playing: 0.5 + sleeping: 0.25 + eating: 0.15 + driving: 0.05 + flying: 0.05 - - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." - - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." + - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." + - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." - **High temperature** (e.g., 2.0): The AI model becomes more adventurous, increasing the chances of selecting less likely tokens, such as "driving" and "flying." - ``Top-p``, also known as nucleus sampling, is a parameter used to @@ -1165,7 +1166,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html import torch from threading import Event, Thread - + from typing import List, Tuple from transformers import ( AutoTokenizer, @@ -1173,8 +1174,8 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html StoppingCriteriaList, TextIteratorStreamer, ) - - + + model_name = model_configuration["model_id"] start_message = model_configuration["start_message"] history_template = model_configuration.get("history_template") @@ -1182,46 +1183,46 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html current_message_template = model_configuration.get("current_message_template") stop_tokens = model_configuration.get("stop_tokens") tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) - + max_new_tokens = 256 - - + + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): self.token_ids = token_ids - + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_id in self.token_ids: if input_ids[0][-1] == stop_id: return True return False - - + + if stop_tokens is not None: if isinstance(stop_tokens[0], str): stop_tokens = tok.convert_tokens_to_ids(stop_tokens) - + stop_tokens = [StopOnTokens(stop_tokens)] - - + + def default_partial_text_processor(partial_text: str, new_text: str): """ helper for updating partially generated answer, used by default - + Params: partial_text: text buffer for storing previosly generated text new_text: text update for the current step Returns: updated text string - + """ partial_text += new_text return partial_text - - + + text_processor = model_configuration.get("partial_text_processor", default_partial_text_processor) - - + + def convert_history_to_token(history: List[Tuple[str, str]]): """ function for conversion history stored as list pairs of user and assistant messages to tokens according to model expected conversation template @@ -1255,7 +1256,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html messages.append({"role": "user", "content": user_msg}) if model_msg: messages.append({"role": "assistant", "content": model_msg}) - + input_token = tok.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt") else: text = start_message + "".join( @@ -1276,12 +1277,12 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids return input_token - - + + def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id): """ callback function for running chatbot on submit button click - + Params: history: conversation history temperature: parameter for control the level of creativity in AI-generated text. @@ -1290,9 +1291,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. conversation_id: unique conversation identifier. - + """ - + # Construct the input message string for the model by concatenating the current system message and conversation history # Tokenize the messages string input_ids = convert_history_to_token(history) @@ -1312,9 +1313,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) if stop_tokens is not None: generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens) - + stream_complete = Event() - + def generate_and_signal_complete(): """ genration function for single thread @@ -1322,18 +1323,18 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html global start_time ov_model.generate(**generate_kwargs) stream_complete.set() - + t1 = Thread(target=generate_and_signal_complete) t1.start() - + # Initialize an empty string to store the generated text partial_text = "" for new_text in streamer: partial_text = text_processor(partial_text, new_text) history[-1][1] = partial_text yield history - - + + def request_cancel(): ov_model.request.cancel() @@ -1342,11 +1343,11 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO {model_id.value} Chatbot", language=model_language.value) - + try: demo.launch() except Exception: diff --git a/docs/notebooks/llm-rag-langchain-with-output.rst b/docs/notebooks/llm-rag-langchain-with-output.rst index 935c4c5ef1f205..1dec9cb2fb6659 100644 --- a/docs/notebooks/llm-rag-langchain-with-output.rst +++ b/docs/notebooks/llm-rag-langchain-with-output.rst @@ -127,7 +127,8 @@ Install required dependencies "onnx<1.16.2", "einops", "transformers_stream_generator", - "tiktoken" "transformers>=4.43.1", + "tiktoken", + "transformers>=4.43.1", "faiss-cpu", "sentence_transformers", "langchain>=0.2.0", diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index 2fbe7e63a8b21b..3ef21583fa5807 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -41,7 +41,6 @@ post `__ diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index fbb69d83fe239a..7f539abf025ebb 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Import @@ -645,7 +637,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 2c5cdf1aecf169..f6a22b6f160760 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -205,10 +205,10 @@ Let’s convert each model part. .. parsed-literal:: - 2024-10-08 02:54:38.009287: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:54:38.043246: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:47:25.606377: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:47:25.640217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:54:38.562064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:47:26.161344: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -233,49 +233,49 @@ Let’s convert each model part. .. parsed-literal:: - config.json: 0%| | 0.00/1.36k [00:00 self.max_seq_len_cached: @@ -548,11 +548,11 @@ documentation `__ - `Select model <#select-model>`__ @@ -64,9 +62,9 @@ Prerequisites .. code:: ipython3 from pathlib import Path - + repo_dir = Path("./ml-mobileclip") - + if not repo_dir.exists(): !git clone https://github.com/apple/ml-mobileclip.git @@ -74,27 +72,26 @@ Prerequisites .. parsed-literal:: Cloning into 'ml-mobileclip'... - remote: Enumerating objects: 84, done. - remote: Counting objects: 100% (84/84), done. - remote: Compressing objects: 100% (61/61), done. - remote: Total 84 (delta 29), reused 75 (delta 22), pack-reused 0 (from 0) - Unpacking objects: 100% (84/84), 467.39 KiB | 2.58 MiB/s, done. + remote: Enumerating objects: 95, done. + remote: Counting objects: 100% (95/95), done. + remote: Compressing objects: 100% (66/66), done. + remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) + Unpacking objects: 100% (95/95), 469.11 KiB | 3.91 MiB/s, done. .. code:: ipython3 %pip install -q "./ml-mobileclip" --no-deps - + %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu - - %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" + + %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -144,9 +141,9 @@ comparison purposes, you can select different models among: .. code:: ipython3 import ipywidgets as widgets - + model_dir = Path("checkpoints") - + supported_models = { "MobileCLIP": { "mobileclip_s0": { @@ -215,8 +212,8 @@ comparison purposes, you can select different models among: }, }, } - - + + model_type = widgets.Dropdown(options=supported_models.keys(), default="MobileCLIP", description="Model type:") model_type @@ -232,13 +229,13 @@ comparison purposes, you can select different models among: .. code:: ipython3 available_models = supported_models[model_type.value] - + model_checkpoint = widgets.Dropdown( options=available_models.keys(), default=list(available_models), description="Model:", ) - + model_checkpoint @@ -253,15 +250,15 @@ comparison purposes, you can select different models among: .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file, device_widget - + model_config = available_models[model_checkpoint.value] Run model inference @@ -296,8 +293,8 @@ Prepare image gallery import matplotlib.pyplot as plt import numpy as np from PIL import Image - - + + def visualize_result(images: List, query: str = "", selected: List[int] = None): """ Utility function for visualization classification results @@ -325,8 +322,8 @@ Prepare image gallery mask = np.ones_like(np.array(images[idx])) a.imshow(mask, "jet", interpolation="none", alpha=0.75) return fig - - + + images_urls = [ "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/282ce53e-912d-41aa-ab48-2a001c022d74", "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/9bb40168-82b5-4b11-ada6-d8df104c736c", @@ -336,17 +333,17 @@ Prepare image gallery image_names = ["red_panda.png", "cat.png", "raccoon.png", "dog.png"] sample_path = Path("data") sample_path.mkdir(parents=True, exist_ok=True) - + images = [] for image_name, image_url in zip(image_names, images_urls): image_path = sample_path / image_name if not image_path.exists(): download_file(image_url, filename=image_name, directory=sample_path) images.append(Image.open(image_path).convert("RGB").resize((640, 420))) - + input_labels = ["cat"] text_descriptions = [f"This is a photo of a {label}" for label in input_labels] - + visualize_result(images, "image gallery"); @@ -393,7 +390,7 @@ preprocessing utilities from PIL import Image import mobileclip import open_clip - + # instantiate model model_name = model_config["model_name"] pretrained = model_config["pretrained"] @@ -408,6 +405,12 @@ preprocessing utilities tokenizer = open_clip.get_tokenizer(model_name) +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + .. parsed-literal:: @@ -423,8 +426,8 @@ Perform search image_tensor = torch.stack([preprocess(image) for image in images]) text = tokenizer(text_descriptions) - - + + with torch.no_grad(): # calculate image embeddings image_encoding_start = time.perf_counter() @@ -436,22 +439,22 @@ Perform search text_features = model.encode_text(text) text_encoding_end = time.perf_counter() print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") - + # normalize embeddings image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + # calcualte similarity score image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.108 ms - Text encoding took 0.0118 ms + Image encoding took 0.136 ms + Text encoding took 0.0123 ms @@ -478,8 +481,8 @@ be used separately. Let’s convert each part to OpenVINO. import types import torch.nn.functional as F - - + + def se_block_forward(self, inputs): """Apply forward pass.""" b, c, h, w = inputs.size() @@ -495,12 +498,12 @@ be used separately. Let’s convert each part to OpenVINO. import openvino as ov import gc - + ov_models_dir = Path("ov_models") ov_models_dir.mkdir(exist_ok=True) - + image_encoder_path = ov_models_dir / f"{model_checkpoint.value}_im_encoder.xml" - + if not image_encoder_path.exists(): if "mobileclip_s" in model_name: model.image_encoder.model.conv_exp.se.forward = types.MethodType(se_block_forward, model.image_encoder.model.conv_exp.se) @@ -513,23 +516,23 @@ be used separately. Let’s convert each part to OpenVINO. ov.save_model(ov_image_encoder, image_encoder_path) del ov_image_encoder gc.collect() - + text_encoder_path = ov_models_dir / f"{model_checkpoint.value}_text_encoder.xml" - + if not text_encoder_path.exists(): model.forward = model.encode_text ov_text_encoder = ov.convert_model(model, example_input=text, input=[-1, text.shape[1]]) ov.save_model(ov_text_encoder, text_encoder_path) del ov_text_encoder gc.collect() - + del model gc.collect(); .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -546,9 +549,9 @@ Select device for image encoder .. code:: ipython3 core = ov.Core() - + device = device_widget() - + device @@ -605,17 +608,17 @@ Perform search print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.0271 ms - Text encoding took 0.00495 ms + Image encoding took 0.0297 ms + Text encoding took 0.00513 ms @@ -651,14 +654,14 @@ models can require different optimal threshold for search. ) from open_clip.transform import image_transform from typing import Optional - - + + current_device = device.value current_model = image_encoder_path.name.split("_im_encoder")[0] - + available_converted_models = [model_file.name.split("_im_encoder")[0] for model_file in ov_models_dir.glob("*_im_encoder.xml")] available_devices = list(core.available_devices) + ["AUTO"] - + download_file( "https://storage.openvinotoolkit.org/data/test_data/videos/car-detection.mp4", directory=sample_path, @@ -668,8 +671,8 @@ models can require different optimal threshold for search. directory=sample_path, filename="coco.mp4", ) - - + + def get_preprocess_and_tokenizer(model_name): if "mobileclip" in model_name: resolution = supported_models["MobileCLIP"][model_name]["image_size"] @@ -690,10 +693,10 @@ models can require different optimal threshold for search. resize_size = model_configs[model_name]["image_size"] preprocess = image_transform((resize_size, resize_size), is_train=False, resize_mode="longest") tokenizer = open_clip.get_tokenizer(model_configs[model_name]["model_name"]) - + return preprocess, tokenizer - - + + def run( path: str, text_search: str, @@ -712,7 +715,7 @@ models can require different optimal threshold for search. global tokenizer global ov_compiled_image_encoder global ov_compiled_text_encoder - + if current_model != model_name or device != current_device: ov_compiled_image_encoder = core.compile_model(ov_models_dir / f"{model_name}_im_encoder.xml", device) ov_compiled_text_encoder = core.compile_model(ov_models_dir / f"{model_name}_text_encoder.xml", device) @@ -722,7 +725,7 @@ models can require different optimal threshold for search. # Load video dataset = LoadVideo(path, transforms=preprocess, vid_stride=stride) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0) - + # Get image query features if image_search: image = preprocess(image_search).unsqueeze(0) @@ -742,11 +745,11 @@ models can require different optimal threshold for search. for image, orig, frame, timestamp in dataloader: with torch.no_grad(): image_features = torch.from_numpy(ov_compiled_image_encoder(image)[0]) - + image_features /= image_features.norm(dim=-1, keepdim=True) probs = query_features.cpu().numpy() @ image_features.cpu().numpy().T probs = probs[0] - + # Save frame similarity values df = pd.DataFrame( { @@ -756,15 +759,15 @@ models can require different optimal threshold for search. } ) res = pd.concat([res, df]) - + # Check if frame is over threshold for i, p in enumerate(probs): if p > thresh: matches.append(to_pil_image(orig[i])) matches_probs.append(p) - + print(f"Frames: {frame.tolist()} - Probs: {probs}") - + # Create plot of similarity values lines = ( alt.Chart(res) @@ -775,16 +778,16 @@ models can require different optimal threshold for search. ) ).properties(width=600) rule = alt.Chart().mark_rule(strokeDash=[6, 3], size=2).encode(y=alt.datum(thresh)) - + selected_frames = np.argsort(-1 * np.array(matches_probs))[:20] matched_sorted_frames = [matches[idx] for idx in selected_frames] - + return ( lines + rule, matched_sorted_frames, ) # Only return up to 20 images to not crash the UI - - + + class LoadVideo(Dataset): def __init__(self, path, transforms, vid_stride=1): self.transforms = transforms @@ -792,27 +795,27 @@ models can require different optimal threshold for search. self.cur_frame = 0 self.cap = cv2.VideoCapture(path) self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - + def __getitem__(self, _): # Read video # Skip over frames for _ in range(self.vid_stride): self.cap.grab() self.cur_frame += 1 - + # Read frame _, img = self.cap.retrieve() timestamp = self.cap.get(cv2.CAP_PROP_POS_MSEC) - + # Convert to PIL img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(np.uint8(img)) - + # Apply transforms img_t = self.transforms(img) - + return img_t, to_tensor(img), self.cur_frame, timestamp - + def __len__(self): return self.total_frames @@ -834,15 +837,15 @@ models can require different optimal threshold for search. if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mobileclip-video-search/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo, Option - + demo = make_demo( run=run, model_option=Option(choices=available_converted_models, value=model_checkpoint.value), device_option=Option(choices=available_devices, value=device.value), ) - + try: demo.launch(debug=False) except Exception: @@ -855,7 +858,7 @@ models can require different optimal threshold for search. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst index 1ba06287ff485c..02efe16d9c0f4a 100644 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ b/docs/notebooks/mobilevlm-language-assistant-with-output.rst @@ -67,9 +67,7 @@ Install requirements Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - optimum-intel 1.20.0.dev0+542347b requires transformers<4.46,>=4.36, but you have transformers 4.33.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -121,13 +119,13 @@ Import required packages .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - 2024-10-08 03:04:16.549795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:04:16.584461: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:57:03.532418: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:57:03.567584: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:04:17.090418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-23 01:57:04.078609: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -344,15 +342,15 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -372,13 +370,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 24% (43 / 169) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 76% (126 / 169) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -418,7 +416,7 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -438,13 +436,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (44 / 170) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 72% (126 / 170) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/model-server-with-output.rst b/docs/notebooks/model-server-with-output.rst index dc6c9e966cf462..d5a9347a46e807 100644 --- a/docs/notebooks/model-server-with-output.rst +++ b/docs/notebooks/model-server-with-output.rst @@ -181,14 +181,7 @@ following rules: .. code:: ipython3 - import platform - - %pip install -q "openvino>=2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.4.0" opencv-python tqdm "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index d1fc70cca19a6d..566aa1c87a941c 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,13 +124,13 @@ Imports .. parsed-literal:: - 2024-10-08 03:06:32.000424: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:06:32.034271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:59:20.725760: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:59:20.759494: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:06:32.663477: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-23 01:59:21.367845: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -170,11 +170,11 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -229,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -655,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 78700602513056..082b6613456e28 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -15,8 +15,8 @@ OpenVINO. Additionally, we will optimize model using - `Prerequisites <#prerequisites>`__ -- `Load PyTorch model <#load-pytorch-model>`__ -- `Run PyTorch Model Inference <#run-pytorch-model-inference>`__ +- `Select Model <#select-model>`__ +- `Download PyTorch model <#download-pytorch-model>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ - `Convert model to OpenVINO IR @@ -24,7 +24,6 @@ OpenVINO. Additionally, we will optimize model using - `Compress Model weights to 4 and 8 bits using NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - `Image Encoder <#image-encoder>`__ - - `Text Embeddings <#text-embeddings>`__ - `Language Model <#language-model>`__ - `Prepare model inference @@ -52,252 +51,551 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "openvino>=2024.1.0" "tqdm" "nncf>=2.10" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.13" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers[transformers] "openvino>=2024.4.0" + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + openvino-dev 2024.4.0 requires openvino==2024.4.0, but you have openvino 2024.5.0.dev20241014 which is incompatible. + openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 - from huggingface_hub import snapshot_download from pathlib import Path - - model_local_dir = Path("nanoLLaVA") - - if not model_local_dir.exists(): - snapshot_download(repo_id="qnguyen3/nanoLLaVA", local_dir=model_local_dir) - - modeling_file = model_local_dir / "modeling_llava_qwen2.py" - orig_modeling_file = model_local_dir / f"orig_{modeling_file.name}" - - - # model code depends from flash_attn package that may be problematic to load. Patch model code for avoiding import of this package - if not orig_modeling_file.exists(): - modeling_file.rename(orig_modeling_file) - with orig_modeling_file.open("r") as f: - content = f.read() - replacement_lines = [ - ("from flash_attn import flash_attn_func, flash_attn_varlen_func", ""), - ("from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input", ""), - (' _flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)', "pass"), - ] - - for replace_pair in replacement_lines: - content = content.replace(*replace_pair) - - with modeling_file.open("w") as f: - f.write(content) - + import requests + helper_file = Path("ov_nano_llava_helper.py") -.. parsed-literal:: - - Fetching 14 files: 0%| | 0/14 [00:00`__ +- `nanoLLaVA-1.5 `__ +You can select one from the provided options below. -.. parsed-literal:: +.. code:: ipython3 - configuration_llava_qwen2.py: 0%| | 0.00/8.87k [00:00`__ +library. For convenience, we will use OpenVINO integration with +HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. -Load PyTorch model ------------------- +General command format: +.. code:: bash + optimum-cli export openvino --model --task -For creating PyTorch model we should use ``from_pretrained`` method of -``AutoModelForCausalLM`` model class. Model weights are already -downloaded from HuggingFace hub using ``snapshot_download`` function on -previous step. +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. .. code:: ipython3 - import transformers - from transformers import AutoModelForCausalLM, AutoTokenizer - from PIL import Image - import warnings - - transformers.logging.set_verbosity_error() - warnings.filterwarnings("ignore") - - model = AutoModelForCausalLM.from_pretrained(model_local_dir, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_local_dir, trust_remote_code=True) + if not converted_model_exists(ov_model_dir): + !optimum-cli export openvino --model {model_id} --task image-text-to-text --trust-remote-code --weight-format fp16 {ov_model_dir} .. parsed-literal:: - 2024-10-08 03:11:17.270186: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:11:17.304136: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:04:00.682228: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:04:00.715051: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:11:18.027701: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Run PyTorch Model Inference ---------------------------- - - - -.. code:: ipython3 - - import torch - import requests - - prompt = "Describe this image in detail" - - messages = [{"role": "user", "content": f"\n{prompt}"}] - text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" - image = Image.open(requests.get(url, stream=True).raw) - image_tensor = model.process_images([image], model.config) - print(prompt) - image - - -.. parsed-literal:: - - Describe this image in detail - - + 2024-10-23 02:04:01.329449: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Some weights of the model checkpoint at qnguyen3/nanoLLaVA were not used when initializing LlavaQwen2ForCausalLM: ['model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.bias', 'model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight', 'model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_bias', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_weight', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.layernorm.bias', 'model.vision_tower.vision_tower.vision_model.head.layernorm.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.head.probe', 'model.vision_tower.vision_tower.vision_model.post_layernorm.bias', 'model.vision_tower.vision_tower.vision_model.post_layernorm.weight'] + - This IS expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:169: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + Unexpectedly found already patched module model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.embeddings.position_embedding while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.mm_projector.0 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.mm_projector.2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module lm_head while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:116: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:307: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.max_seq_len_cached: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + OpenVINO and OpenVINO Tokenizers versions are not binary compatible. + OpenVINO version: 2024.5.0-16993 + OpenVINO Tokenizers version: 2024.5.0.0 + First 3 numbers should be the same. Update OpenVINO Tokenizers to compatible version. It is recommended to use the same day builds for pre-release version. To install both OpenVINO and OpenVINO Tokenizers release version perform: + pip install --force-reinstall openvino openvino-tokenizers + To update both OpenVINO and OpenVINO Tokenizers to the latest pre-release version perform: + pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + Tokenizer won't be converted. + Traceback (most recent call last): + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in + sys.exit(main()) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + service.run() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run + main_export( + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export + core = Core() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init + self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object + RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: + Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE -.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png - - - -.. code:: ipython3 - - from transformers import TextStreamer - - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) - - -.. parsed-literal:: - - The image features a white, fluffy lamb, likely a lama, in the midst of a fire. The lamb's fluffy fur is a mix of white and black, and it has a unique pattern of black spots on its body. The lamb's eyes are a bright shade of blue, and its ears are also white. The lamb's mouth is open, revealing pink lips, adding a playful touch to its overall appearance. - The lamb's face is quite detailed, with features such as a small black eye, a small nose, and a black mouth. The lamb's face is also quite expressive, with its mouth open, revealing pink lips - - -Convert and Optimize model --------------------------- - - - -Our model conversion and optimization consist of following steps: 1. -Convert model to OpenVINO format and save it on disk. 2. Compress model -weights using NNCF - -Let’s consider each step more deeply. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined bellow. We will use `OpenVINO Model Conversion -API `__ -for conversion PyTorch model to OpenVINO Intermediate Representation -format. ``ov.convert_model`` function accepts PyTorch model instance and -example input for tracing and returns ready to use OpenVINO Model object -that can be compiled on device using ``core.compile_model`` or saved on -disk for next usage with help ``ov.save_model`` function. Depends from -generation step, model accepts different inputs and activates different -parts of pipeline. For preserving the same level of flexibility, we will -split model on parts: Image Encoder, Text Embeddings, Language Model and -convert each part separately. Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -337,22 +635,20 @@ prediction quality. More details about weights compression, can be found in `OpenVINO documentation `__. - **Note**: There is no speedup for INT4 compressed models on dGPU. - Please select below whether you would like to run INT4 weight compression instead of INT8 weight compression. .. code:: ipython3 import ipywidgets as widgets - + compression_mode = widgets.Dropdown( options=["INT4", "INT8"], value="INT4", description="Compression mode:", disabled=False, ) - + compression_mode @@ -366,101 +662,25 @@ compression instead of INT8 weight compression. .. code:: ipython3 - import gc - import warnings - import torch - import openvino as ov import nncf - from typing import Optional, Tuple - - warnings.filterwarnings("ignore") - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - + import openvino as ov -.. code:: ipython3 + core = ov.Core() if compression_mode.value == "INT4": - ov_out_path = Path("ov_nanollava/INT4_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT4" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) else: - ov_out_path = Path("ov_nanollava/INT8_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT8" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - + image_encoder_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - ov_out_path.mkdir(exist_ok=True, parents=True) - model.config.save_pretrained(ov_out_path) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - model_path = ov_out_path / "llava_with_past.xml" - - model.eval() - model.config.use_cache = True - model.config.torchscript = True + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + Image Encoder ~~~~~~~~~~~~~ @@ -469,60 +689,42 @@ Image Encoder Image Encoder is represented in nanoLLaVA by pretrained SigLIP model. Image encoder is responsible for encoding input images into embedding -space. +space. Code bellow demonstrates how to apply weights compression for +image encoder model. .. code:: ipython3 - if not image_encoder_path.exists(): - model.forward = model.encode_images - with torch.no_grad(): - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 384, 384)), - input=[(-1, 3, 384, 384)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -.. parsed-literal:: + import gc - WARNING:nncf:NNCF provides best results with torch==2.4.*, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.4.* + compressed_vision_encoder_path = ov_compressed_model_dir / "openvino_vision_embeddings_model.xml" + vision_encoder_path = ov_model_dir / "openvino_vision_embeddings_model.xml" + if not compressed_vision_encoder_path.exists(): + ov_vision_encoder = core.read_model(vision_encoder_path) + ov_compressed_vision_encoder = nncf.compress_weights(ov_vision_encoder, **image_encoder_wc_parameters) + ov.save_model(ov_compressed_vision_encoder, compressed_vision_encoder_path) + del ov_compressed_vision_encoder + del ov_vision_encoder + gc.collect(); .. parsed-literal:: - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + warning_deprecated( + 2024-10-23 02:04:33.280788: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:04:33.314985: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-10-23 02:04:33.946816: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: - Applying weight compression to image encoder INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (159 / 159) │ 100% (159 / 159) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (159 / 159) │ 100% (159 / 159) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -538,39 +740,6 @@ space. -.. parsed-literal:: - - Image Encoder model successfully converted - - -Text Embeddings -~~~~~~~~~~~~~~~ - - - -In LLMs, input embedding is a part of language model, but for LLaVA the -first step hidden state produced by this model part should be integrated -with image embeddings into common embedding space. For ability to reuse -this model part and avoid introduction of extra llm model instance, we -will use it separately. - -.. code:: ipython3 - - if not token_embedding_model_path.exists(): - with torch.no_grad(): - ov_model = ov.convert_model(model.get_model().embed_tokens, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - -.. parsed-literal:: - - Token Embedding model successfully converted - - Language Model ~~~~~~~~~~~~~~ @@ -579,71 +748,22 @@ Language Model Language Model is responsible for generation answer in LLaVA. This part is very similar to standard LLM for text generation. Our model uses `Qwen/Qwen1.5-0.5B `__ as base -LLM. To optimize the generation process and use memory more efficiently, -HuggingFace transformers API provides a mechanism for caching model -state externally using ``use_cache=True`` parameter and -``past_key_values`` argument in inputs and outputs. With the cache, the -model saves the hidden state once it has been computed. The model only -computes the one for the most recently generated output token at each -time step, re-using the saved ones for hidden tokens. This reduces the -generation complexity from :math:`O(n^3)` to :math:`O(n^2)` for a -transformer model. With this option, the model gets the previous step’s -hidden states (cached attention keys and values) as input and -additionally provides hidden states for the current step as output. It -means for all next iterations, it is enough to provide only a new token -obtained from the previous step and cached key values to get the next -token prediction. +LLM. .. code:: ipython3 - if not model_path.exists(): - model.forward = super(type(model), model).forward - example_input = {"attention_mask": torch.ones([2, 10], dtype=torch.int64), "position_ids": torch.tensor([[8, 9], [8, 9]], dtype=torch.int64)} - - dynamic_shapes = { - "input_embeds": {0: "batch_size", 1: "seq_len"}, - "attention_mask": {0: "batch_size", 1: "prev_seq_len + seq_len"}, - "position_ids": {0: "batch_size", 1: "seq_len"}, - } - input_embeds = torch.zeros((2, 2, model.config.hidden_size)) - - input_names = ["attention_mask", "position_ids"] - output_names = ["logits"] - - past_key_values = [] - for i in range(model.config.num_hidden_layers): - kv = [torch.randn([2, model.config.num_key_value_heads, 8, model.config.hidden_size // model.config.num_attention_heads]) for _ in range(2)] - past_key_values.append(kv) - input_names.extend([f"past_key_values.{i}.key", f"past_key_values.{i}.value"]) - output_names.extend([f"present.{i}.key", f"present.{i}.value"]) - dynamic_shapes[input_names[-2]] = {0: "batch_size", 2: "seq_len"} - dynamic_shapes[input_names[-1]] = {0: "batch_size", 2: "seq_len"} - - example_input["past_key_values"] = past_key_values - example_input["inputs_embeds"] = input_embeds - input_names.append("inputs_embeds") - dynamic_shapes["inputs_embeds"] = {0: "batch_size", 1: "seq_len"} - ov_model = ov.convert_model(model, example_input=example_input) - ov_model = postprocess_converted_model( - ov_model, example_input=example_input.values(), input_names=input_names, output_names=output_names, dynamic_shapes=dynamic_shapes - ) - - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - print("LLaVA model successfully converted") - del model - gc.collect(); - + compressed_llm_path = ov_compressed_model_dir / "openvino_language_model.xml" + llm_path = ov_model_dir / "openvino_language_model.xml" -.. parsed-literal:: + if not compressed_llm_path.exists(): + ov_llm = core.read_model(llm_path) + ov_compressed_llm = nncf.compress_weights(ov_llm, **llava_wc_parameters) + ov.save_model(ov_compressed_llm, compressed_llm_path) + del ov_compressed_llm + del ov_llm + gc.collect() - Applying weight compression to second stage LLava model + copy_model_files(ov_model_dir, ov_compressed_model_dir) @@ -662,13 +782,13 @@ token prediction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 47% (48 / 169) │ 20% (47 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 53% (121 / 169) │ 80% (121 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 47% (48 / 169) │ 20% (47 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 53% (121 / 169) │ 80% (121 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -684,365 +804,24 @@ token prediction. -.. parsed-literal:: - - LLaVA model successfully converted - - Prepare model inference pipeline -------------------------------- -``OVLlavaQwen2ForCausalLM`` class provides ease-to-use interface for -using model in generation scenario. It is based on -``transformers.generation.GenerationMixin`` that gives us opportunity to -reuse all reach capabilities for generation implemented in HuggingFace -Transformers library. More details about this interface can be found in -`HuggingFace -documentation `__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - from transformers.image_processing_utils import BatchFeature, get_size_dict - from transformers.image_transforms import ( - convert_to_rgb, - normalize, - rescale, - resize, - to_channel_dimension_format, - ) - from transformers.image_utils import ( - ChannelDimension, - PILImageResampling, - to_numpy_array, - ) - import numpy as np - import torch - from typing import Dict - from functools import partial, reduce - - IGNORE_INDEX = -100 - IMAGE_TOKEN_INDEX = -200 - - - class ImageProcessor: - def __init__( - self, - image_mean=(0.5, 0.5, 0.5), - image_std=(0.5, 0.5, 0.5), - size=(384, 384), - crop_size: Dict[str, int] = None, - resample=PILImageResampling.BICUBIC, - rescale_factor=1 / 255, - data_format=ChannelDimension.FIRST, - ): - crop_size = crop_size if crop_size is not None else {"height": 384, "width": 384} - crop_size = get_size_dict(crop_size, default_to_square=True, param_name="crop_size") - - self.image_mean = image_mean - self.image_std = image_std - self.size = size - self.resample = resample - self.rescale_factor = rescale_factor - self.data_format = data_format - self.crop_size = crop_size - - def preprocess(self, images, return_tensors): - if isinstance(images, Image.Image): - images = [images] - else: - assert isinstance(images, list) - - transforms = [ - convert_to_rgb, - to_numpy_array, - partial(resize, size=self.size, resample=self.resample, data_format=self.data_format), - partial(rescale, scale=self.rescale_factor, data_format=self.data_format), - partial(normalize, mean=self.image_mean, std=self.image_std, data_format=self.data_format), - partial(to_channel_dimension_format, channel_dim=self.data_format, input_channel_dim=self.data_format), - ] - - images = reduce(lambda x, f: [*map(f, x)], transforms, images) - data = {"pixel_values": images} - - return BatchFeature(data=data, tensor_type=return_tensors) - - - class OVLlavaQwen2ForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.embed_tokens = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(self.config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self.image_processor = ImageProcessor() - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, position_ids, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = self.prepare_inputs_for_multimodal(input_ids, position_ids, attention_mask, past_key_values, images) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def prepare_inputs_for_multimodal(self, input_ids, position_ids, attention_mask, past_key_values, images): - inputs = {} - if past_key_values is None: - past_key_values = self._dummy_past_key_values(input_ids.shape[0]) - else: - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - inputs.update(zip(self.key_value_input_names, past_key_values)) - - if images is None or input_ids.shape[1] == 1: - target_shape = past_key_values[-1][-1].shape[-2] + 1 if past_key_values is not None else input_ids.shape[1] - attention_mask = torch.cat( - ( - attention_mask, - torch.ones((attention_mask.shape[0], target_shape - attention_mask.shape[1]), dtype=attention_mask.dtype, device=attention_mask.device), - ), - dim=1, - ) - position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 - inputs_embeds = self.embed_tokens(input_ids)[0] - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - inputs["inputs_embeds"] = inputs_embeds - - return inputs - - if type(images) is list or images.ndim == 5: - concat_images = torch.cat([image for image in images], dim=0) - image_features = self.encode_images(concat_images) - split_sizes = [image.shape[0] for image in images] - image_features = torch.split(image_features, split_sizes, dim=0) - image_features = [x.flatten(0, 1).to(self.device) for x in image_features] - else: - image_features = self.encode_images(images).to(self.device) - - # Let's just add dummy tensors if they do not exist, - # it is a headache to deal with None all the time. - # But it is not ideal, and if you have a better idea, - # please open an issue / submit a PR, thanks. - labels = None - _attention_mask = attention_mask - if attention_mask is None: - attention_mask = torch.ones_like(input_ids, dtype=torch.bool) - else: - attention_mask = attention_mask.bool() - if position_ids is None: - position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device) - if labels is None: - labels = torch.full_like(input_ids, IGNORE_INDEX) - - # remove the padding using attention_mask -- TODO: double check - input_ids = [cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask)] - labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)] - - new_input_embeds = [] - new_labels = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - num_images = (cur_input_ids == IMAGE_TOKEN_INDEX).sum() - if num_images == 0: - cur_image_features = image_features[cur_image_idx] - cur_input_embeds_1 = self.embed_tokens(cur_input_ids) - cur_input_embeds = torch.cat([cur_input_embeds_1, cur_image_features[0:0]], dim=0) - new_input_embeds.append(cur_input_embeds) - new_labels.append(labels[batch_idx]) - cur_image_idx += 1 - continue - - image_token_indices = [-1] + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist() + [cur_input_ids.shape[0]] - cur_input_ids_noim = [] - cur_labels = labels[batch_idx] - cur_labels_noim = [] - for i in range(len(image_token_indices) - 1): - cur_input_ids_noim.append(cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - cur_labels_noim.append(cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - split_sizes = [x.shape[0] for x in cur_labels_noim] - cur_input_embeds = torch.from_numpy(self.embed_tokens(torch.cat(cur_input_ids_noim).unsqueeze(0))[0])[0] - cur_input_embeds_no_im = torch.split(cur_input_embeds, split_sizes, dim=0) - cur_new_input_embeds = [] - cur_new_labels = [] - - for i in range(num_images + 1): - cur_new_input_embeds.append(cur_input_embeds_no_im[i]) - cur_new_labels.append(cur_labels_noim[i]) - if i < num_images: - cur_image_features = image_features[cur_image_idx] - cur_image_idx += 1 - cur_new_input_embeds.append(cur_image_features) - cur_new_labels.append(torch.full((cur_image_features.shape[0],), IGNORE_INDEX, device=cur_labels.device, dtype=cur_labels.dtype)) - - cur_new_input_embeds = torch.cat(cur_new_input_embeds) - cur_new_labels = torch.cat(cur_new_labels) - - new_input_embeds.append(cur_new_input_embeds) - new_labels.append(cur_new_labels) - - # Truncate sequences to max length as image embeddings can make the sequence longer - tokenizer_model_max_length = getattr(self.config, "tokenizer_model_max_length", None) - if tokenizer_model_max_length is not None: - new_input_embeds = [x[:tokenizer_model_max_length] for x in new_input_embeds] - new_labels = [x[:tokenizer_model_max_length] for x in new_labels] - - # Combine them - max_len = max(x.shape[0] for x in new_input_embeds) - batch_size = len(new_input_embeds) - - new_input_embeds_padded = [] - new_labels_padded = torch.full((batch_size, max_len), IGNORE_INDEX, dtype=new_labels[0].dtype, device=new_labels[0].device) - attention_mask = torch.zeros((batch_size, max_len), dtype=attention_mask.dtype, device=attention_mask.device) - position_ids = torch.zeros((batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device) - - for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)): - cur_len = cur_new_embed.shape[0] - if getattr(self.config, "tokenizer_padding_side", "right") == "left": - new_input_embeds_padded.append( - torch.cat( - (torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device), cur_new_embed), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, -cur_len:] = cur_new_labels - attention_mask[i, -cur_len:] = True - position_ids[i, -cur_len:] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - else: - new_input_embeds_padded.append( - torch.cat( - (cur_new_embed, torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device)), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, :cur_len] = cur_new_labels - attention_mask[i, :cur_len] = True - position_ids[i, :cur_len] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - - new_input_embeds = torch.stack(new_input_embeds_padded, dim=0) - attention_mask = attention_mask.to(dtype=_attention_mask.dtype) - inputs["inputs_embeds"] = new_input_embeds.numpy() - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - - return inputs - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "position_ids": kwargs.get("position_ids", None), - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - - def _dummy_past_key_values(self, batch_size): - pkv = [] - for input_name in self.key_value_input_names: - input_t = self.model.input(input_name) - input_shape = self.model.input(input_name).get_partial_shape() - input_shape[0] = batch_size - input_shape[2] = 0 - pkv.append(ov.Tensor(input_t.get_element_type(), input_shape.get_shape())) - - return pkv - - def encode_images(self, images): - return torch.from_numpy(self.image_encoder(images)[0]) - - def expand2square(self, pil_img, background_color): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - def process_images(self, images, model_cfg): - image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None) - new_images = [] - if image_aspect_ratio == "pad": - for image in images: - image = self.expand2square(image, tuple(int(x * 255) for x in self.image_processor.image_mean)) - image = self.image_processor.preprocess(image, return_tensors="pt")["pixel_values"][0] - new_images.append(image) - else: - return self.image_processor(images, return_tensors="pt")["pixel_values"] - if all(x.shape == new_images[0].shape for x in new_images): - new_images = torch.stack(new_images, dim=0) - return new_images +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers Pixtral implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. Run OpenVINO Model Inference ---------------------------- @@ -1057,16 +836,16 @@ Select device .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -1078,22 +857,66 @@ Select device +Optimum Intel provides Transformers-like interface for inference +OpenVINO models that allows smooth integration into user application, +where you need just replace model class, other parts of pipeline - +preprocessing and postprocessing code remains the same. It means that we +can use the same tokenizer and image processor that provided with model. + .. code:: ipython3 - core = ov.Core() - - ov_model = OVLlavaQwen2ForCausalLM(core, ov_out_path, device.value) + from optimum.intel.openvino import OVModelForVisualCausalLM + from transformers import AutoConfig, AutoTokenizer, AutoProcessor, TextStreamer + + # prepare tokenizer + tokenizer = AutoTokenizer.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + + # prepare image processor + config = AutoConfig.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(config.mm_vision_tower) + + # initialize OpenVINO model inference class + ov_model = OVModelForVisualCausalLM.from_pretrained(ov_compressed_model_dir, device=device.value, trust_remote_code=True) .. code:: ipython3 + from ov_nano_llava_helper import process_images, process_text_input + from PIL import Image + + prompt = "Describe this image in detail" + + messages = [{"role": "user", "content": f"\n{prompt}"}] + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" + image = Image.open(requests.get(url, stream=True).raw) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) + streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = ov_model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + display(image) + print(f"Question:\n{prompt}") + print("Answer:") + + output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + + +.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png .. parsed-literal:: - The image features a white, fluffy lamb with a playful, cheerful expression. The lamb is positioned in the center of the image, and it appears to be in motion, as if it's running. The lamb's face is white and it has a cute, adorable expression. It has a pair of bright, black eyes that are wide open, and it has a small, pink nose. The lamb's ears are also white and are quite large. The lamb's legs are white and are positioned behind it. The lamb's tail is also white and is quite long. The lamb's body is fluffy and it covers a large portion of the + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + + +.. parsed-literal:: + + Question: + Describe this image in detail + Answer: + The image portrays a charming and playful scene featuring a white lama. This adorable creature has a playful expression, with its eyes sparkling with joy and its nose in a playful smile. It is adorned with cute, tiny eyes that add a playful touch to its face. The lama's ears are also quite noticeable, with one of them sporting a tiny pink button. The lama's body is covered in fluffy, white fur, and it has its hind legs visible, adding a sense of movement to the image. + The lama is surrounded by a vivid display of fire. The flames are bright and lively, with some areas appearing more intense Interactive demo @@ -1103,11 +926,11 @@ Interactive demo .. code:: ipython3 - import time from transformers import TextIteratorStreamer, StoppingCriteria from threading import Thread - - + import torch + + class KeywordsStoppingCriteria(StoppingCriteria): def __init__(self, keywords, tokenizer, input_ids): self.keywords = keywords @@ -1122,7 +945,7 @@ Interactive demo self.keyword_ids.append(torch.tensor(cur_keyword_ids)) self.tokenizer = tokenizer self.start_len = input_ids.shape[1] - + def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len) self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids] @@ -1135,14 +958,14 @@ Interactive demo if keyword in outputs: return True return False - + def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: outputs = [] for i in range(output_ids.shape[0]): outputs.append(self.call_for_batch(output_ids[i].unsqueeze(0), scores)) return all(outputs) - - + + def bot_streaming(message, history): messages = [] if message["files"]: @@ -1151,7 +974,7 @@ Interactive demo for _, hist in enumerate(history): if isinstance(hist[0], tuple): image = hist[0][0] - + if len(history) > 0 and image is not None: messages.append({"role": "user", "content": f"\n{history[1][0]}"}) messages.append({"role": "assistant", "content": history[1][1]}) @@ -1172,29 +995,32 @@ Interactive demo messages.append({"role": "user", "content": f"\n{message['text']}"}) elif len(history) == 0 and image is None: messages.append({"role": "user", "content": message["text"]}) - + print(messages) image = Image.open(image).convert("RGB") text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) stop_str = "<|im_end|>" keywords = [stop_str] stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - image_tensor = ov_model.process_images([image], ov_model.config) generation_kwargs = dict( - input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=128, stopping_criteria=[stopping_criteria], temperature=0.01 + input_ids=input_ids, + attention_mask=attention_mask, + images=image_tensor, + streamer=streamer, + max_new_tokens=128, + stopping_criteria=[stopping_criteria], + temperature=0.01, ) thread = Thread(target=ov_model.generate, kwargs=generation_kwargs) thread.start() - + buffer = "" for new_text in streamer: buffer += new_text generated_text_without_prompt = buffer[:] - time.sleep(0.04) yield generated_text_without_prompt .. code:: ipython3 @@ -1202,11 +1028,11 @@ Interactive demo if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/nano-llava-multimodal-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(fn=bot_streaming) - + try: demo.launch(debug=False) except Exception: @@ -1219,17 +1045,12 @@ Interactive demo .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - + To create a public link, set `share=True` in `launch()`. -.. code:: ipython3 - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index 06636ec17bf7e9..b1db0093158982 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -81,7 +81,6 @@ Install requirements Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. magika 0.5.1 requires numpy<2.0,>=1.24; python_version >= "3.8" and python_version < "3.9", but you have numpy 1.23.5 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png index 67d21a28902ef3..fb2b3b2abdf69f 100644 --- a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png +++ b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bae0d18e0fe38cee76b5220d145c96e7d6d38b087cba46fc71db126eb3a88bb8 -size 175079 +oid sha256:1d300cd81ce6f44a5ee2ded6f14b739b0a8ecd8d5d2e487743cd085b92b3533b +size 175076 diff --git a/docs/notebooks/oneformer-segmentation-with-output.rst b/docs/notebooks/oneformer-segmentation-with-output.rst index cc94c7dbff9047..f0a9e5703e1644 100644 --- a/docs/notebooks/oneformer-segmentation-with-output.rst +++ b/docs/notebooks/oneformer-segmentation-with-output.rst @@ -63,14 +63,7 @@ Install required libraries .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" scipy ipywidgets Pillow tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" "matplotlib>=3.4" scipy ipywidgets Pillow tqdm Prepare the environment ----------------------- diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index 789fdeb53b1a45..a837b88ac6976b 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -407,7 +407,7 @@ It is pre-trained model optimized to work with TensorFlow Lite. .. parsed-literal:: - Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.1) + Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.3) .. code:: ipython3 @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -1092,5 +1092,5 @@ measure the time it takes now. .. parsed-literal:: - Loading the network to the AUTO device took 0.07 seconds. + Loading the network to the AUTO device took 0.08 seconds. diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index 4e4a53150f7504..b73dd8059aa65b 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -99,13 +99,10 @@ Clone repository and install requirements Cloning into 'OpenVoice'... remote: Enumerating objects: 438, done. - remote: Counting objects: 100% (238/238), done. - remote: Compressing objects: 100% (113/113), done. - remote: Total 438 (delta 178), reused 127 (delta 125), pack-reused 200 (from 1) - Receiving objects: 100% (438/438), 3.82 MiB | 16.31 MiB/s, done. - Resolving deltas: 100% (221/221), done. + remote: Total 438 (delta 0), reused 0 (delta 0), pack-reused 438 (from 1) + Receiving objects: 100% (438/438), 3.84 MiB | 19.30 MiB/s, done. + Resolving deltas: 100% (207/207), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -245,9 +242,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -261,9 +258,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -413,50 +410,50 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 - %5559 : Float(1, 192, 151, strides=[28992, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 152, strides=[29184, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 38912]) != torch.Size([1, 1, 39424]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 38656]) != torch.Size([1, 1, 39680]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152, 43]) != torch.Size([1, 1, 154, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 151, 43]) != torch.Size([1, 1, 155, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152]) != torch.Size([1, 1, 154]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 151]) != torch.Size([1, 1, 155]). _check_trace( - 2024-10-08 03:16:11.904034: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:08:53.401718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (199 / 199) │ 100% (199 / 199) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (199 / 199) │ 100% (199 / 199) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -479,27 +476,27 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -716,7 +713,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -871,7 +868,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -889,7 +886,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1078,7 +1075,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 59f72ff2fd84c3..9efa8e8e7b87f2 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -61,25 +61,16 @@ Guide =2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino-dev>=2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -381,12 +372,12 @@ Converting text-recognition-resnet-fc… .. parsed-literal:: ========== Converting text-recognition-resnet-fc to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx ONNX check passed successfully. ========== Converting text-recognition-resnet-fc to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. @@ -394,8 +385,8 @@ Converting text-recognition-resnet-fc… [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin diff --git a/docs/notebooks/optimize-preprocessing-with-output.rst b/docs/notebooks/optimize-preprocessing-with-output.rst index 2bfb2926bd44a8..9c5a60b553c26c 100644 --- a/docs/notebooks/optimize-preprocessing-with-output.rst +++ b/docs/notebooks/optimize-preprocessing-with-output.rst @@ -81,14 +81,8 @@ Settings .. code:: ipython3 - import platform - # Install openvino package - %pip install -q "openvino>=2023.1.0" opencv-python tqdm - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 @@ -103,7 +97,6 @@ Settings Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -360,7 +353,7 @@ for mean/scale normalization. .. parsed-literal:: - + @@ -391,7 +384,7 @@ may be specified is input data .. parsed-literal:: - + @@ -429,7 +422,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -643,6 +636,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0148 seconds per image, FPS: 67.77 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.46 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0154 seconds per image, FPS: 65.10 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.53 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index 6e0bcc263ed873..e5c426d1920f33 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -207,7 +207,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -253,7 +253,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index e6377775fd7b9b..43d5bfb39ed4e8 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:972e260b6c702c732ac3d1cb7cb2d8e38c9fe8ea0bd7b8e783e74f5d5c8cc6f5 -size 591296 +oid sha256:e5f67b60f77e15fb0b7908ada4ff64ab506accc23f579c81193175e5044f8f3b +size 593679 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index e3bb27d374c1aa..576878a578a323 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -63,7 +63,7 @@ Imports else: %pip install -q "paddlepaddle>=2.5.1" %pip install -q "paddleclas>=2.5.2" --no-deps - %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm + %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" # Install openvino package %pip install -q "openvino>=2023.1.0" @@ -89,11 +89,11 @@ Imports .. parsed-literal:: - --2024-10-08 03:19:19-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + --2024-10-23 02:12:15-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. Proxy request sent, awaiting response... 404 Not Found - 2024-10-08 03:19:19 ERROR 404: Not Found. + 2024-10-23 02:12:15 ERROR 404: Not Found. dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-10-08 03:19:21 INFO: Loading faiss with AVX512 support. - 2024-10-08 03:19:21 INFO: Successfully loaded faiss with AVX512 support. + 2024-10-23 02:12:17 INFO: Loading faiss with AVX512 support. + 2024-10-23 02:12:17 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +209,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/10/08 03:19:44] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/10/23 02:12:43] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -275,7 +275,7 @@ clipping values. .. parsed-literal:: - 2024-10-08 03:19:45 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-10-23 02:12:44 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,7 +287,7 @@ clipping values. .. parsed-literal:: - + @@ -462,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0077 seconds per image, FPS: 130.66 + PaddlePaddle model on CPU: 0.0073 seconds per image, FPS: 137.19 PaddlePaddle result: Labrador retriever, 0.75138 @@ -523,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.65 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 382.97 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/paint-by-example-with-output.rst b/docs/notebooks/paint-by-example-with-output.rst index ed3c0d2e0ab1d7..2f1371652c5750 100644 --- a/docs/notebooks/paint-by-example-with-output.rst +++ b/docs/notebooks/paint-by-example-with-output.rst @@ -58,7 +58,7 @@ This is the overall flow of the application: .. code:: ipython3 %pip install -q "torch>=2.1" torchvision --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm + %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" "matplotlib>=3.4" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm Download the model from `HuggingFace Paint-by-Example `__. diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 25e4d4bed03a7d..3bbb67d2232c99 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -75,7 +75,6 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. onnx 1.16.1 requires protobuf>=3.20.2, but you have protobuf 3.19.6 which is incompatible. paddlepaddle 2.6.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.19.6 which is incompatible. @@ -118,11 +117,11 @@ Load the original model and inference .. parsed-literal:: - 2024-10-08 03:20:24.075446: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:20:24.108997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:13:22.641328: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:13:22.675982: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. Flash attention 2 is not installed - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. @@ -141,7 +140,7 @@ Load the original model and inference @@ -195,7 +194,7 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -236,11 +235,11 @@ stage the model produces tokens during several runs. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): @@ -411,7 +410,7 @@ and run inference. @@ -469,8 +468,6 @@ Interactive inference Running on local URL: http://127.0.0.1:7860 - Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB - To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index b559c680b5503e..2b07305615f09e 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -128,22 +128,14 @@ Guide =2024.0.0" - %pip install -q opencv-python requests scipy tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q opencv-python requests scipy tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -372,7 +364,7 @@ Data Processing includes data preprocess and postprocess functions. - Data preprocess function is used to change the layout and shape of input data, according to requirement of the network input format. -- Data postprocess function is used to extract the useful information from +- Datapostprocess function is used to extract the useful information from network’s original output and visualize it. .. code:: ipython3 diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index 8293308ef73078..92224b9dd2d944 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb8c9f580a9b1dfe6c67e1ee3b4f0d25a254ef01ccdb10234899873f17a13093 -size 218731 +oid sha256:d40f6ee0940455d5f7692589affef1fe1e8b44d9a3ae36d46d9a1b70599a6698 +size 219519 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index bd1afd6a47a8ed..568072711ff2d8 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -17,9 +17,8 @@ post `__ - -**Table of contents:** +precision using `NNCF `__ #### +Table of contents: - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ @@ -89,10 +88,10 @@ Select Model -The tutorial supports the following models from Phi-3 model family: - -`Phi-3.5-vision-instruct `__ -- -`Phi-3-vision-128k-instruct `__ +The tutorial supports the following models from Phi-3 model family: + +- `Phi-3.5-vision-instruct `__ +- `Phi-3-vision-128k-instruct `__ You can select one from the provided options below. @@ -265,10 +264,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): @@ -365,7 +364,7 @@ documentation =3.9.2, but you have protobuf 3.20.3 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. @@ -200,10 +199,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-10-08 03:26:26.755777: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:26:26.790097: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:19:25.748160: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:19:25.783265: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:26:27.482079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:19:26.449413: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -224,15 +223,6 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: Loading PhotoMaker components [1] id_encoder from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30]... - - -.. parsed-literal:: - - The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. - - -.. parsed-literal:: - Loading PhotoMaker components [2] lora_weights from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30] @@ -401,20 +391,20 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (151 / 151) │ 100% (151 / 151) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (151 / 151) │ 100% (151 / 151) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -478,20 +468,20 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (73 / 73) │ 100% (73 / 73) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (73 / 73) │ 100% (73 / 73) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -510,11 +500,11 @@ sequence of latent text embeddings. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (194 / 194) │ 100% (194 / 194) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (194 / 194) │ 100% (194 / 194) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -584,26 +574,26 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (794 / 794) │ 100% (794 / 794) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (794 / 794) │ 100% (794 / 794) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -659,11 +649,11 @@ VAE decoder. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index 631e392394cfed..ef008d1d82ee52 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-10-08 03:34:29.221746: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:34:29.256826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:27:23.824587: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:27:23.860019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:34:29.928193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:27:24.531762: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -134,6 +134,7 @@ directly in latent space, achieving super fast inference with few steps. Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: ['caption_projection.y_embedding'] + You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 @@ -142,11 +143,6 @@ directly in latent space, achieving super fast inference with few steps. Loading checkpoint shards: 0%| | 0/4 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - - .. parsed-literal:: @@ -233,7 +229,7 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -275,11 +271,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -304,9 +300,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -452,7 +448,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -567,7 +563,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -617,13 +613,12 @@ layers and (2) activations of other layers. The steps are the following: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino INFO:nncf:1 ignored nodes were found by types in the NNCFGraph - INFO:nncf:1 ignored nodes were found by types in the NNCFGraph INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (290 / 290) │ 100% (290 / 290) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (290 / 290) │ 100% (290 / 290) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1551,13 +1546,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 3% (3 / 194) │ 0% (0 / 191) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 97% (191 / 194) │ 100% (191 / 191) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 3% (3 / 194) │ 0% (0 / 191) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 97% (191 / 194) │ 100% (191 / 191) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1576,13 +1571,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 98% (37 / 40) │ 0% (0 / 3) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 2% (3 / 40) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 98% (37 / 40) │ 0% (0 / 3) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 2% (3 / 40) │ 100% (3 / 3) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1653,7 +1648,7 @@ pipelines. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1710,9 +1705,9 @@ pipelines, we use mean inference time on 3 samples. .. parsed-literal:: - FP16 pipeline: 40.403 seconds - Optimized pipeline: 54.895 seconds - Performance speed-up: 0.736 + FP16 pipeline: 40.277 seconds + Optimized pipeline: 50.624 seconds + Performance speed-up: 0.796 Interactive inference @@ -1772,6 +1767,8 @@ to launch the interactive demo. Running on local URL: http://127.0.0.1:7860 + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/pixtral-with-output.rst b/docs/notebooks/pixtral-with-output.rst index f380b99cc7ee38..577aedf8550655 100644 --- a/docs/notebooks/pixtral-with-output.rst +++ b/docs/notebooks/pixtral-with-output.rst @@ -60,7 +60,7 @@ Prerequisites .. code:: ipython3 %pip install -q "torch>=2.1" torchvision "pillow" "tqdm" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/llava_model" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "nncf>=2.13.0" "openvino>=2024.4" %pip install -q "transformers>=4.45.0" --extra-index-url https://download.pytorch.org/whl/cpu @@ -71,7 +71,6 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -157,27 +156,21 @@ documentation True + Loading checkpoint shards: 100%|██████████████████| 6/6 [00:01<00:00, 3.46it/s] We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) - Using framework PyTorch: 2.2.2+cpu [ WARNING ] Unexpectedly found already patched module language_model.model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. @@ -460,47 +453,41 @@ documentation False [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. Export model to OpenVINO directly failed with: Config dummy inputs are not a subset of the model inputs: {'input'} vs {'args', 'kwargs'}. Model will be exported to ONNX - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - Saving external data to one file... + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (1 / 281) │ 0% (0 / 280) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (280 / 281) │ 100% (280 / 280) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:07 • 0:00:00 + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (1 / 281) │ 0% (0 / 280) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (280 / 281) │ 100% (280 / 280) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (3 / 172) │ 0% (0 / 169) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (169 / 172) │ 100% (169 / 169) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (3 / 172) │ 0% (0 / 169) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (169 / 172) │ 100% (169 / 169) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (1 / 1) │ 0% (0 / 0) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (1 / 1) │ 0% (0 / 0) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:02 • 0:00:00 @@ -561,13 +548,10 @@ Intel can be found in .. parsed-literal:: - 2024-10-08 04:29:37.124362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 04:29:37.158372: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 03:22:21.803644: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 03:22:21.838426: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 04:29:37.816800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Compiling the Language model to CPU ... - Compiling the Text embeddings model to CPU ... - Compiling the vision_embeddings to CPU ... + 2024-10-23 03:22:22.499374: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -611,7 +595,7 @@ Intel can be found in .. parsed-literal:: - The unusual aspect of this image is the presence of a cat and a dog lying together peacefully inside a cardboard box. This is not a common sight, as cats and dogs are often perceived as being natural enemies or at least not inclined to share spaces closely. The image portrays a harmonious and playful interaction between the two animals, which challenges typical stereotypes about their relationship. + The unusual aspect of this image is that the cat is lying inside a cardboard box, which is not a typical setting for a cat. Cats are often known for their affinity for boxes, but it is still considered unusual to see a cat comfortably resting inside a box in a living room setting. The cat appears relaxed and content, which adds to the charm of the scene. The presence of a sofa in the background further emphasizes the domestic and cozy atmosphere of the image. Interactive demo diff --git a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png index acdee31b5fc986..555f528de6532e 100644 --- a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png +++ b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c27bcedcb93b8775d73cdf416666ce2517ea493b556386f5f7ccd35d53ed15c3 -size 108140 +oid sha256:1909ffa4ca40de34e6ed63d20d6bbf34ec27667f31a937215382942546d817e7 +size 107972 diff --git a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst index 906f8bbc4434de..88db2dcec22b0d 100644 --- a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst +++ b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst @@ -557,9 +557,9 @@ performance. .. parsed-literal:: - PyTorch model on CPU: 0.039 seconds per image, FPS: 25.34 - ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 56.97 - OpenVINO IR model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 55.62 + PyTorch model on CPU: 0.042 seconds per image, FPS: 23.58 + ONNX model in OpenVINO Runtime/AUTO: 0.017 seconds per image, FPS: 57.35 + OpenVINO IR model in OpenVINO Runtime/AUTO: 0.028 seconds per image, FPS: 36.13 **Show Device Information** @@ -588,7 +588,7 @@ References - `Torchvision `__ - `Pytorch ONNX Documentation `__ -- `PIP install openvino-dev `__ +- `PIP install openvino `__ - `OpenVINO ONNX support `__ - `Model Conversion API diff --git a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst index d2b770f4402052..2c1a5c8d6e8107 100644 --- a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst +++ b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst @@ -159,7 +159,7 @@ Settings .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') @@ -449,15 +449,15 @@ I. Evaluate the loaded model .. parsed-literal:: - Test: [ 0/79] Time 0.290 (0.290) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) - Test: [10/79] Time 0.227 (0.240) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.232 (0.240) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.243 (0.239) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.251 (0.240) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.234 (0.240) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.242 (0.242) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.240 (0.242) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 18.879 + Test: [ 0/79] Time 0.324 (0.324) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [10/79] Time 0.320 (0.265) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) + Test: [20/79] Time 0.256 (0.262) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [30/79] Time 0.262 (0.260) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) + Test: [40/79] Time 0.253 (0.260) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.256 (0.259) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.253 (0.258) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.256 (0.258) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 20.127 Test accuracy of FP32 model: 60.740 @@ -500,10 +500,10 @@ Guide `__ - -**Table of contents:** +`NNCF `__ #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ @@ -79,11 +78,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_audio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-audio/ov_qwen2_audio_helper.py") open("ov_qwen2_audio_helper.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -212,13 +211,13 @@ documentation `__ .. code:: ipython3 from ov_qwen2_audio_helper import OVQwen2AudioForConditionalGeneration - + # Uncomment below lines to see the model inference class code # OVQwen2AudioForConditionalGeneration?? .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -423,20 +422,20 @@ Run model inference from transformers import AutoProcessor, TextStreamer import librosa import IPython.display as ipd - - + + processor = AutoProcessor.from_pretrained(model_dir) - + audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac" audio_chat_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav" audio_file = Path(audio_url.split("/")[-1]) audio_chat_file = Path(audio_chat_url.split("/")[-1]) - + if not audio_file.exists(): r = requests.get(audio_url) with audio_file.open("wb") as f: f.write(r.content) - + if not audio_chat_file.exists(): r = requests.get(audio_chat_url) with audio_chat_file.open("wb") as f: @@ -458,14 +457,14 @@ Voice chat ], }, ] - + text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]] - + inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True) display(ipd.Audio(audio_chat_file)) print("Answer:") - + generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -477,7 +476,7 @@ Voice chat .. raw:: html - + - + .. parsed-literal:: @@ -367,7 +347,7 @@ can be found in the Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. - + Download and convert model to OpenVINO IR via Optimum Intel CLI --------------------------------------------------------------- @@ -416,46 +396,6 @@ documentation `__. if exit_code != 0: raise Exception("Failed to load and convert model!") - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - - -**Export command:** - - - -``optimum-cli export openvino --model openai/whisper-tiny --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-tiny`` - - -.. parsed-literal:: - - 2024-10-08 06:43:10.758697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Moving the following attributes in the config to the generation config: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> False - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:1071: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_features.shape[-1] != expected_seq_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:388: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - - Run inference OpenVINO model with WhisperPipeline ------------------------------------------------- @@ -496,11 +436,9 @@ and put array as input. .. code:: ipython3 - sample = copy.deepcopy(en_raw_speech) + genai_result = ov_pipe.generate(en_raw_speech) - genai_result = ov_pipe.generate(sample) - - display(ipd.Audio(sample, rate=samplerate)) + display(ipd.Audio(en_raw_speech, rate=samplerate)) print(f"Result: {genai_result}") @@ -509,16 +447,61 @@ and put array as input. - + .. parsed-literal:: - Result: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + Result: Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + + +Whisper could provide a phrase-level timestamps for audio. Let’s try +this scenario, we will specify ``return_timestamps=True`` for +``generate`` method. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` in seconds. +.. code:: ipython3 + + genai_result_timestamps = ov_pipe.generate(en_raw_speech, return_timestamps=True) + + for segment in genai_result_timestamps.chunks: + print(f"{segment.start_ts}sec. ---> {segment.end_ts}sec.") + print(f"{segment.text}\n") + + +.. parsed-literal:: + + 0.0sec. ---> 3.0sec. + Colonel Jessif, did you order the code rate? + + 3.0sec. ---> 4.5sec. + You don't have to answer that question. + + 4.5sec. ---> 6.5sec. + I'll answer the question. + + 6.5sec. ---> 8.0sec. + You want answers? + + 8.0sec. ---> 9.0sec. + I think I'm entitled. + + 9.0sec. ---> 10.0sec. + You want answers? + + 10.0sec. ---> 11.0sec. + I want the truth. + + 11.0sec. ---> 13.0sec. + You can't handle the truth. + + Let’s see how to work the ``translate`` task. It supports for multilingual models only. For that case we will specify ``language`` and @@ -542,7 +525,7 @@ format. } if model_type.value == "Multilingual models": - sample = copy.deepcopy(mls_example["audio"]) + sample = mls_example["audio"] genai_result_ml = ov_pipe.generate(sample["array"], max_new_tokens=100, task="translate", language=languages_genai[SAMPLE_LANG.value]) @@ -559,7 +542,7 @@ format. Your browser does not support the audio element. - + .. parsed-literal:: @@ -567,7 +550,7 @@ format. Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. - + Compare performance PyTorch vs OpenVINO --------------------------------------- @@ -621,10 +604,10 @@ Compare performance PyTorch vs OpenVINO .. parsed-literal:: - Mean torch openai/whisper-tiny generation time: 0.273s - Mean openvino openai/whisper-tiny generation time: 0.166s - Performance openai/whisper-tiny openvino speedup: 1.650 - + Mean torch openai/whisper-tiny generation time: 0.624s + Mean openvino openai/whisper-tiny generation time: 0.344s + Performance openai/whisper-tiny openvino speedup: 1.814 + Quantization ------------ @@ -706,13 +689,7 @@ Below is an example of the whisper-tiny model Like the original PyTorch model, the OpenVINO model is also compatible with HuggingFace `pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +interface for ``automatic-speech-recognition``. .. code:: ipython3 @@ -721,14 +698,6 @@ seconds is optimal. To activate batching, pass the argument batch_size. ov_model = OVModelForSpeechSeq2Seq.from_pretrained(str(model_path), device=device.value) ov_processor = AutoProcessor.from_pretrained(str(model_path)) - -.. parsed-literal:: - - Compiling the encoder to CPU ... - Compiling the decoder to CPU ... - Compiling the decoder to CPU ... - - Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -797,14 +766,6 @@ negligible. from datasets import load_dataset from tqdm.notebook import tqdm - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - CALIBRATION_DATASET_SIZE = 30 @@ -868,127 +829,6 @@ negligible. ov_quantized_pipe = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/30 [00:00 - + Your browser does not support the audio element. - + .. parsed-literal:: - Original : Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. - Quantized: Mr Quilder is the apostle of the middle classes and we are glad to welcome his gospel. - + Original : Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + Quantized: Don, I'll just, if you order the code right. You don have to answer that question. I'll answer the question. You want answers. I think I'm entitled you want answer. I want the truth. You can't handle the truth. You can't handle the truth. + Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1096,10 +934,10 @@ for Word Error Rate. .. parsed-literal:: - Whole pipeline performance speedup: 1.381 + Whole pipeline performance speedup: 1.499 Whisper transcription word accuracy. Original model: 82.88%. Quantized model: 84.13%. Accuracy drop: -1.25%. - + Interactive demo ---------------- @@ -1115,7 +953,7 @@ upload button) or record using your microphone. import requests if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/distil-whisper-asr/gradio_helper.py") + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/whisper-asr-genai/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) from gradio_helper import make_demo, GradioPipeline @@ -1127,23 +965,9 @@ upload button) or record using your microphone. demo = make_demo(gr_pipeline) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(share=True, debug=False) + demo.launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/whisper-subtitles-generation-with-output.rst b/docs/notebooks/whisper-subtitles-generation-with-output.rst index ad479f160f3453..8bfbfa8a6e86a3 100644 --- a/docs/notebooks/whisper-subtitles-generation-with-output.rst +++ b/docs/notebooks/whisper-subtitles-generation-with-output.rst @@ -18,8 +18,11 @@ blog `__, `model card `__ and GitHub `repository `__. -In this notebook, we will use Whisper with OpenVINO to generate -subtitles in a sample video. Additionally, we will use +In this notebook, we will use Whisper model with `OpenVINO Generate +API `__ for `Whisper +automatic speech recognition +scenarios `__ +to generate subtitles in a sample video. Additionally, we will use `NNCF `__ improving model performance by INT8 quantization. Notebook contains the following steps: 1. Download the model. 2. Instantiate the PyTorch model pipeline. 3. @@ -75,11 +78,23 @@ Install dependencies. .. code:: ipython3 - %pip install -q "openvino>=2024.1.0" "nncf>=2.10.0" - %pip install -q "python-ffmpeg<=1.0.16" moviepy "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "yt_dlp>=2024.8.6" soundfile librosa jiwer + %pip install -q "nncf>=2.13.0" + %pip install -q --pre -U "openvino" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer %pip install -q "gradio>=4.19" +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + Instantiate model ----------------- @@ -135,7 +150,7 @@ Whisper family. .. parsed-literal:: - Dropdown(description='Model:', index=6, options=('openai/whisper-large-v3', 'openai/whisper-large-v2', 'openai… + Dropdown(description='Model:', index=7, options=('openai/whisper-large-v3-turbo', 'openai/whisper-large-v3', '… @@ -144,45 +159,31 @@ Convert model to OpenVINO Intermediate Representation (IR) format using Optimum- -The Hugging Face Optimum API is a high-level API that enables us to -convert and quantize models from the Hugging Face Transformers library -to the OpenVINO™ IR format. For more details, refer to the `Hugging Face -Optimum -documentation `__. +Listed Whisper model are available for downloading via the `HuggingFace +hub `__. We will use optimum-cli +interface for exporting it into OpenVINO Intermediate Representation +(IR) format. -Optimum Intel can be used to load optimized models from the `Hugging -Face Hub `__ and -create pipelines to run an inference with OpenVINO Runtime using Hugging -Face APIs. The Optimum Inference models are API compatible with Hugging -Face Transformers models. This means we just need to replace the -``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` -class. +Optimum CLI interface for converting models supports export to OpenVINO +(supported starting optimum-intel 1.12 version). General command format: -Below is an example of the whisper-tiny model +.. code:: bash -.. code:: diff + optimum-cli export openvino --model --task - -from transformers import AutoModelForSpeechSeq2Seq - +from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoTokenizer, pipeline - - model_id = "openai/whisper-tiny" - -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) - +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) - -Model class initialization starts with calling the ``from_pretrained`` -method. When downloading and converting the Transformers model, the -parameter ``export=True`` should be added. We can save the converted -model for the next usage with the ``save_pretrained`` method. -Alternatively, model conversion can be performed using Optimum-CLI -interface. You can find more details about Optimum-Intel and Optimum CLI -usage in this `tutorial `__. -The command bellow illustrates how to convert whisper using optimum cli. +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For LLMs it will be +``automatic-speech-recognition-with-past``. If model initialization +requires to use remote code, ``--trust-remote-code`` flag additionally +should be passed. Full list of supported arguments available via +``--help`` For more details and examples of usage, please check `optimum +documentation `__. .. code:: ipython3 - from pathlib import Path - model_dir = model_id.value.split("/")[-1] if not Path(model_dir).exists(): @@ -201,24 +202,12 @@ Whisper model. whisper_pipeline.png -Preprocessing and post-processing are important in this model use. -``transformers.AutoProcessor`` class used for initialization -``WhisperProcessor`` is responsible for preparing audio input data for -the PyTorch model, converting it to Mel-spectrogram and decoding -predicted output token_ids into string using tokenizer. Tokenizers and -Processors are distributed with models also compatible with the OpenVINO -model. - -Like the original PyTorch model, the OpenVINO model is also compatible -with HuggingFace -`pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +To simplify user experience we will use `OpenVINO Generate +API `__. +Firstly we will create pipeline with ``WhisperPipeline``. You can +construct it straight away from the folder with the converted model. It +will automatically load the ``model``, ``tokenizer``, ``detokenizer`` +and default ``generation configuration``. Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -227,12 +216,6 @@ Select inference device select device from dropdown list for running inference using OpenVINO -.. code:: ipython3 - - import openvino as ov - - core = ov.Core() - .. code:: ipython3 import requests @@ -244,7 +227,7 @@ select device from dropdown list for running inference using OpenVINO from notebook_utils import device_widget - device = device_widget() + device = device_widget(default="CPU", exclude=["NPU"]) device @@ -253,78 +236,46 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoProcessor, pipeline - - ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) - - processor = AutoProcessor.from_pretrained(model_dir) + import openvino_genai - pipe = pipeline( - "automatic-speech-recognition", - model=ov_model, - chunk_length_s=30, - tokenizer=processor.tokenizer, - feature_extractor=processor.feature_extractor, - ) + ov_pipe = openvino_genai.WhisperPipeline(str(model_dir), device=device.value) Run video transcription pipeline -------------------------------- -Now, we are ready to start transcription. We select a video from YouTube -that we want to transcribe. Be patient, as downloading the video may -take some time. +Now, we are ready to start transcription. Let’s load the video first. .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import download_file - VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" - link = widgets.Text( - value=VIDEO_LINK, - placeholder="Type link for video", - description="Video:", - disabled=False, - ) + output_file = Path("downloaded_video.mp4") - link - - + download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Sheldon%20Cooper%20Jim%20Parsons%20at%20Intels%20Lab.mp4", + filename=output_file.name, + ) .. parsed-literal:: - Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') + 'downloaded_video.mp4' already exists. -.. code:: ipython3 - - from pathlib import Path - import yt_dlp - - print(f"Downloading video {link.value} started") - - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - - print(f"Video saved to {output_file}") - .. parsed-literal:: - Downloading video https://youtu.be/kgL5LBM-hFI started - Video saved to downloaded_video.mp4 + PosixPath('/home/labuser/work/notebook/openvino_notebooks/notebooks/whisper-subtitles-generation/downloaded_video.mp4') + Select the task for the model: @@ -377,17 +328,31 @@ Select the task for the model: input_video.audio.write_audiofile(audio_file, verbose=False, logger=None) with open(audio_file, "rb") as f: inputs = f.read() - audio = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate) + audio = ffmpeg_read(inputs, 16000) return { "raw": audio, - "sampling_rate": pipe.feature_extractor.sampling_rate, + "sampling_rate": 16000, }, duration +Let’s run generation method. We will put input data as ``np array``. +Also we will specify ``task`` and ``return_timestamps=True`` options. If +task is ``translate``, you can place ``language`` option, for example +``<|fr|>`` for French or it would be detect automatically. We can set up +generation parameters in different ways. We can get default config with +``get_generation_config()``, setup parameters and put config directly to +``generate()``. It’s also possible to specify the needed options just as +inputs in the ``generate()`` method and we will use this way. Then we +just run ``generate`` method and get the output in text format. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` + .. code:: ipython3 inputs, duration = get_audio(output_file) - transcription = pipe(inputs, generate_kwargs={"task": task.value}, return_timestamps=True)["chunks"] + transcription = ov_pipe.generate(inputs["raw"], task=task.value, return_timestamps=True).chunks .. code:: ipython3 @@ -419,18 +384,19 @@ Select the task for the model: """ segment_lines = [] for idx, segment in enumerate(transcription): + timestamp = (segment.start_ts, segment.end_ts) # for the case where the model could not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. - if segment["timestamp"][1] is None: - segment["timestamp"] = (segment["timestamp"][0], filter_duration) + if segment.end_ts == -1: + timestamp[1] = filter_duration - if filter_duration is not None and (segment["timestamp"][0] >= math.floor(filter_duration) or segment["timestamp"][1] > math.ceil(filter_duration) + 1): + if filter_duration is not None and (timestamp[0] >= math.floor(filter_duration) or timestamp[1] > math.ceil(filter_duration) + 1): break segment_lines.append(str(idx + 1) + "\n") - time_start = format_timestamp(segment["timestamp"][0]) - time_end = format_timestamp(segment["timestamp"][1]) + time_start = format_timestamp(timestamp[0]) + time_end = format_timestamp(timestamp[1]) time_str = f"{time_start} --> {time_end}\n" segment_lines.append(time_str) - segment_lines.append(segment["text"] + "\n\n") + segment_lines.append(segment.text + "\n\n") return segment_lines "The results will be saved in the ``downloaded_video.srt`` file. SRT is @@ -457,7 +423,7 @@ Now let us see the results. .. parsed-literal:: - Video(value=b"\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00:'moov\x00\x00\x00lmvhd...", height='800… + Video(value=b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00Aimoov\x00\x00\x00lmvhd...', height='800… @@ -565,6 +531,42 @@ Please select below whether you would like to run Whisper quantization. %load_ext skip_kernel_extension +Let’s load converted OpenVINO model format using Optimum-Intel to easily +quantize it. + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ or +local folder to create pipelines to run an inference with OpenVINO +Runtime using Hugging Face APIs. The Optimum Inference models are API +compatible with Hugging Face Transformers models. This means we just +need to replace the ``AutoModelForXxx`` class with the corresponding +``OVModelForXxx`` class. + +Below is an example of the whisper-tiny model + +.. code:: diff + + -from transformers import AutoModelForSpeechSeq2Seq + +from optimum.intel.openvino import OVModelForSpeechSeq2Seq + from transformers import AutoTokenizer, pipeline + + model_id = "openai/whisper-tiny" + -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) + +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) + +Like the original PyTorch model, the OpenVINO model is also compatible +with HuggingFace +`pipeline `__ +interface for ``automatic-speech-recognition``. + +.. code:: ipython3 + + from transformers import AutoProcessor + from optimum.intel.openvino import OVModelForSpeechSeq2Seq + + ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) + processor = AutoProcessor.from_pretrained(model_dir) + Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -583,6 +585,9 @@ improves quantization quality. %%skip not $to_quantize.value from itertools import islice + from tqdm.notebook import tqdm + from datasets import load_dataset + from transformers import pipeline from optimum.intel.openvino.quantization import InferRequestWrapper @@ -629,28 +634,16 @@ negligible. import gc import shutil import nncf - from datasets import load_dataset - from tqdm.notebook import tqdm - - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - + import openvino as ov - CALIBRATION_DATASET_SIZE = 50 + CALIBRATION_DATASET_SIZE = 30 quantized_model_path = Path(f"{model_dir}_quantized") def quantize(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int): if not quantized_model_path.exists(): - encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset( - ov_model, calibration_dataset_size - ) + encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset(ov_model, calibration_dataset_size) print("Quantizing encoder") quantized_encoder = nncf.quantize( ov_model.encoder.model, @@ -658,7 +651,7 @@ negligible. subset_size=len(encoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.50) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.80), ) ov.save_model(quantized_encoder, quantized_model_path / "openvino_encoder_model.xml") del quantized_encoder @@ -672,7 +665,7 @@ negligible. subset_size=len(decoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96), ) ov.save_model(quantized_decoder_with_past, quantized_model_path / "openvino_decoder_with_past_model.xml") del quantized_decoder_with_past @@ -685,218 +678,24 @@ negligible. shutil.copy(model_path / "generation_config.json", quantized_model_path / "generation_config.json") shutil.copy(model_path / "openvino_decoder_model.xml", quantized_model_path / "openvino_decoder_model.xml") shutil.copy(model_path / "openvino_decoder_model.bin", quantized_model_path / "openvino_decoder_model.bin") + shutil.copy(model_path / "openvino_tokenizer.xml", quantized_model_path / "openvino_tokenizer.xml") + shutil.copy(model_path / "openvino_tokenizer.bin", quantized_model_path / "openvino_tokenizer.bin") + shutil.copy(model_path / "openvino_detokenizer.xml", quantized_model_path / "openvino_detokenizer.xml") + shutil.copy(model_path / "openvino_detokenizer.bin", quantized_model_path / "openvino_detokenizer.bin") + shutil.copy(model_path / "tokenizer_config.json", quantized_model_path / "tokenizer_config.json") + shutil.copy(model_path / "tokenizer.json", quantized_model_path / "tokenizer.json") + shutil.copy(model_path / "vocab.json", quantized_model_path / "vocab.json") + shutil.copy(model_path / "preprocessor_config.json", quantized_model_path / "preprocessor_config.json") + shutil.copy(model_path / "special_tokens_map.json", quantized_model_path / "special_tokens_map.json") + shutil.copy(model_path / "normalizer.json", quantized_model_path / "normalizer.json") + shutil.copy(model_path / "merges.txt", quantized_model_path / "merges.txt") + shutil.copy(model_path / "added_tokens.json", quantized_model_path / "added_tokens.json") - quantized_ov_model = OVModelForSpeechSeq2Seq.from_pretrained(quantized_model_path, compile=False) - quantized_ov_model.to(device.value) - quantized_ov_model.compile() - return quantized_ov_model + quantized_ov_pipe = openvino_genai.WhisperPipeline(str(quantized_model_path), device=device.value) + return quantized_ov_pipe - ov_quantized_model = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/50 [00:00 00:00:05,000 - What's that? - - 2 - 00:00:05,000 --> 00:00:07,000 - Oh, wow. - - 3 - 00:00:09,000 --> 00:00:11,000 - Hello humans. - - 4 - 00:00:14,000 --> 00:00:15,000 - Focus on me. - - 5 - 00:00:15,000 --> 00:00:16,000 - Focus on the guard. - - 6 - 00:00:18,000 --> 00:00:20,000 - Don't tell anyone what you're seen in here. - - 7 - 00:00:22,000 --> 00:00:24,000 - Have you seen what's in there? - - 8 - 00:00:24,000 --> 00:00:25,000 - They have intel. - - 9 - 00:00:25,000 --> 00:00:27,000 - This is where it all changes. - - - - Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -975,9 +725,6 @@ and performance stand-points. To measure accuracy, we use ``1 - WER`` as a metric, where WER stands for Word Error Rate. -When measuring inference time, we do it separately for encoder and -decoder-with-past model forwards, and for the whole model inference too. - .. code:: ipython3 %%skip not $to_quantize.value @@ -986,69 +733,34 @@ decoder-with-past model forwards, and for the whole model inference too. from contextlib import contextmanager from jiwer import wer, wer_standardize - TEST_DATASET_SIZE = 50 - MEASURE_TIME = False - - @contextmanager - def time_measurement(): - global MEASURE_TIME - try: - MEASURE_TIME = True - yield - finally: - MEASURE_TIME = False - - def time_fn(obj, fn_name, time_list): - original_fn = getattr(obj, fn_name) - - def wrapper(*args, **kwargs): - if not MEASURE_TIME: - return original_fn(\*args, \*\*kwargs) - start_time = time.perf_counter() - result = original_fn(\*args, \*\*kwargs) - end_time = time.perf_counter() - time_list.append(end_time - start_time) - return result - - setattr(obj, fn_name, wrapper) def calculate_transcription_time_and_accuracy(ov_model, test_samples): - encoder_infer_times = [] - decoder_with_past_infer_times = [] whole_infer_times = [] - time_fn(ov_model, "generate", whole_infer_times) - time_fn(ov_model.encoder, "forward", encoder_infer_times) - time_fn(ov_model.decoder_with_past, "forward", decoder_with_past_infer_times) ground_truths = [] predictions = [] for data_item in tqdm(test_samples, desc="Measuring performance and accuracy"): - input_features = extract_input_features(data_item) - - with time_measurement(): - predicted_ids = ov_model.generate(input_features) - transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) + start_time = time.perf_counter() + transcription = ov_model.generate(data_item["audio"]["array"], return_timestamps=True) + end_time = time.perf_counter() + whole_infer_times.append(end_time - start_time) ground_truths.append(data_item["text"]) - predictions.append(transcription[0]) + predictions.append(transcription.texts[0]) word_accuracy = (1 - wer(ground_truths, predictions, reference_transform=wer_standardize, hypothesis_transform=wer_standardize)) * 100 mean_whole_infer_time = sum(whole_infer_times) - mean_encoder_infer_time = sum(encoder_infer_times) - mean_decoder_with_time_infer_time = sum(decoder_with_past_infer_times) - return word_accuracy, (mean_whole_infer_time, mean_encoder_infer_time, mean_decoder_with_time_infer_time) + return word_accuracy, mean_whole_infer_time test_dataset = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True) test_dataset = test_dataset.shuffle(seed=42).take(TEST_DATASET_SIZE) test_samples = [sample for sample in test_dataset] - accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_model, test_samples) - accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(ov_quantized_model, test_samples) - print(f"Encoder performance speedup: {times_original[1] / times_quantized[1]:.3f}") - print(f"Decoder with past performance speedup: {times_original[2] / times_quantized[2]:.3f}") - print(f"Whole pipeline performance speedup: {times_original[0] / times_quantized[0]:.3f}") + accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_pipe, test_samples) + accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(quantized_ov_pipe, test_samples) + print(f"Whole pipeline performance speedup: {times_original / times_quantized:.3f}") print(f"Whisper transcription word accuracy. Original model: {accuracy_original:.2f}%. Quantized model: {accuracy_quantized:.2f}%.") print(f"Accuracy drop: {accuracy_original - accuracy_quantized:.2f}%.") @@ -1067,11 +779,9 @@ decoder-with-past model forwards, and for the whole model inference too. .. parsed-literal:: - Encoder performance speedup: 1.352 - Decoder with past performance speedup: 1.342 - Whole pipeline performance speedup: 1.350 - Whisper transcription word accuracy. Original model: 81.67%. Quantized model: 83.67%. - Accuracy drop: -1.99%. + Whole pipeline performance speedup: 1.452 + Whisper transcription word accuracy. Original model: 81.77%. Quantized model: 82.97%. + Accuracy drop: -1.20%. Interactive demo @@ -1081,18 +791,26 @@ Interactive demo .. code:: ipython3 - def transcribe(url, task, use_int8): - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - inputs, duration = get_audio(output_file) - m_pipe = int8_pipe if use_int8 else pipe - transcription = m_pipe(inputs, generate_kwargs={"task": task.lower()}, return_timestamps=True)["chunks"] + def_config = ov_pipe.get_generation_config() + + + def transcribe(video_path, task, use_int8): + data_path = Path(video_path) + inputs, duration = get_audio(data_path) + m_pipe = quantized_ov_pipe if use_int8 else ov_pipe + + frame_num = len(inputs["raw"]) / 16000 + if frame_num > 30: + config = ov_pipe.get_generation_config() + chink_num = math.ceil(frame_num / 30) + config.max_length = chink_num * def_config.max_length + m_pipe.set_generation_config(config) + + transcription = m_pipe.generate(inputs["raw"], task=task.lower(), return_timestamps=True).chunks srt_lines = prepare_srt(transcription, duration) - with output_file.with_suffix(".srt").open("w") as f: + with data_path.with_suffix(".srt").open("w") as f: f.writelines(srt_lines) - return [str(output_file), str(output_file.with_suffix(".srt"))] + return [str(data_path), str(data_path.with_suffix(".srt"))] if not Path("gradio_helper.py").exists(): @@ -1101,7 +819,7 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None) + demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None, sample_path=output_file) try: demo.launch(debug=False) diff --git a/docs/notebooks/wuerstchen-image-generation-with-output.rst b/docs/notebooks/wuerstchen-image-generation-with-output.rst index 8e61a42f359dad..342ddbafdc7edb 100644 --- a/docs/notebooks/wuerstchen-image-generation-with-output.rst +++ b/docs/notebooks/wuerstchen-image-generation-with-output.rst @@ -230,7 +230,8 @@ parameter to generate a less memory-demanding model. Text encoder model has 2 inputs: - ``input_ids``: vector of tokenized input sentence. Default tokenizer vector length is 77. -- ``attention_mask``: vector of same length as ``input_ids`` describing the attention mask. +- ``attention_mask``: vector of same length as ``input_ids`` describing + the attention mask. .. code:: ipython3 @@ -284,7 +285,6 @@ Decoder pipeline Decoder pipeline consists of 3 parts: decoder, text encoder and VQGAN. Decoder model is the WuerstchenDiffNeXt UNet decoder. Inputs are: - - ``x``: sample - ``r``: timestep - ``effnet``: interpolation block diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output.rst b/docs/notebooks/yolov11-instance-segmentation-with-output.rst index 931465dba1e79b..9d9b5e6bd15c0e 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov11-instance-segmentation-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -216,14 +216,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.3MB/s] + 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.1MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.0ms - Speed: 2.1ms preprocess, 66.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.4ms + Speed: 1.8ms preprocess, 66.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640) @@ -252,15 +252,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (5.9 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-seg_openvino_model/' (6.0 MB) Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=segment model=yolo11n-seg_openvino_model imgsz=640 half Validate: yolo val task=segment model=yolo11n-seg_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -331,7 +331,7 @@ Test on single image .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-seg_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... @@ -345,8 +345,8 @@ Test on single image .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.1ms - Speed: 2.1ms preprocess, 23.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.2ms + Speed: 3.6ms preprocess, 23.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -668,8 +668,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 10.9ms - Speed: 2.0ms preprocess, 10.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 17.7ms + Speed: 2.1ms preprocess, 17.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -717,18 +717,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.71 ms + [ INFO ] Read model took 20.05 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -738,7 +738,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.12 ms + [ INFO ] Reshape model took 8.82 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -746,7 +746,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 385.36 ms + [ INFO ] Compile model took 387.38 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -783,17 +783,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 37.32 ms + [ INFO ] First inference took 36.42 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 1794 iterations - [ INFO ] Duration: 15062.45 ms + [ INFO ] Count: 1788 iterations + [ INFO ] Duration: 15050.22 ms [ INFO ] Latency: - [ INFO ] Median: 50.09 ms - [ INFO ] Average: 50.21 ms - [ INFO ] Min: 43.19 ms - [ INFO ] Max: 65.81 ms - [ INFO ] Throughput: 119.10 FPS + [ INFO ] Median: 49.80 ms + [ INFO ] Average: 50.35 ms + [ INFO ] Min: 33.27 ms + [ INFO ] Max: 104.15 ms + [ INFO ] Throughput: 118.80 FPS .. code:: ipython3 @@ -808,18 +808,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.59 ms + [ INFO ] Read model took 29.13 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -837,7 +837,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 608.94 ms + [ INFO ] Compile model took 594.13 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -874,17 +874,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 26.24 ms + [ INFO ] First inference took 27.63 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 3636 iterations - [ INFO ] Duration: 15046.89 ms + [ INFO ] Count: 3714 iterations + [ INFO ] Duration: 15026.92 ms [ INFO ] Latency: - [ INFO ] Median: 24.53 ms - [ INFO ] Average: 24.70 ms - [ INFO ] Min: 12.80 ms - [ INFO ] Max: 40.79 ms - [ INFO ] Throughput: 241.64 FPS + [ INFO ] Median: 23.95 ms + [ INFO ] Average: 24.14 ms + [ INFO ] Min: 17.70 ms + [ INFO ] Max: 39.05 ms + [ INFO ] Throughput: 247.16 FPS Other ways to optimize model diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png index 32694d34889741..011e13859e8a3e 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceb4f1951cf92d0a2365750af32b945a90f65102311933328225f2dbd7b802c1 -size 496675 +oid sha256:e393b8edd8a20af1aaab6b69932bf0e715bf4f9c35814dd766dfa74ae27f2ae4 +size 493271 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output.rst b/docs/notebooks/yolov11-keypoint-detection-with-output.rst index cc830279de6753..97c549e6751dd0 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output.rst +++ b/docs/notebooks/yolov11-keypoint-detection-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') @@ -214,14 +214,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.1MB/s] + 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.3MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 56.4ms - Speed: 1.9ms preprocess, 56.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 59.6ms + Speed: 2.1ms preprocess, 59.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -250,15 +250,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-pose.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 56, 8400) (6.0 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... - OpenVINO: export success ✅ 1.9s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... + OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) - Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (2.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=pose model=yolo11n-pose_openvino_model imgsz=640 half Validate: yolo val task=pose model=yolo11n-pose_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco-pose.yaml half Visualize: https://netron.app @@ -334,12 +334,12 @@ ready to check model prediction. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 21.5ms - Speed: 2.5ms preprocess, 21.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 19.9ms + Speed: 2.3ms preprocess, 19.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -682,12 +682,12 @@ on the image. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 31.0ms - Speed: 2.1ms preprocess, 31.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 28.0ms + Speed: 2.0ms preprocess, 28.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640) @@ -737,18 +737,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.80 ms + [ INFO ] Read model took 19.55 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -757,14 +757,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.17 ms + [ INFO ] Reshape model took 8.54 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 338.05 ms + [ INFO ] Compile model took 329.35 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -801,17 +801,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 33.00 ms + [ INFO ] First inference took 33.67 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 17292 iterations - [ INFO ] Duration: 120045.86 ms + [ INFO ] Count: 17280 iterations + [ INFO ] Duration: 120049.75 ms [ INFO ] Latency: - [ INFO ] Median: 40.71 ms - [ INFO ] Average: 41.52 ms - [ INFO ] Min: 29.33 ms - [ INFO ] Max: 102.50 ms - [ INFO ] Throughput: 144.04 FPS + [ INFO ] Median: 40.74 ms + [ INFO ] Average: 41.55 ms + [ INFO ] Min: 24.20 ms + [ INFO ] Max: 98.48 ms + [ INFO ] Throughput: 143.94 FPS .. code:: ipython3 @@ -827,18 +827,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.03 ms + [ INFO ] Read model took 28.39 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -854,7 +854,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 558.98 ms + [ INFO ] Compile model took 556.59 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -891,17 +891,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.29 ms + [ INFO ] First inference took 30.44 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15060.72 ms + [ INFO ] Count: 5160 iterations + [ INFO ] Duration: 15031.11 ms [ INFO ] Latency: - [ INFO ] Median: 34.51 ms - [ INFO ] Average: 34.67 ms - [ INFO ] Min: 21.44 ms - [ INFO ] Max: 51.10 ms - [ INFO ] Throughput: 344.21 FPS + [ INFO ] Median: 34.62 ms + [ INFO ] Average: 34.76 ms + [ INFO ] Min: 25.46 ms + [ INFO ] Max: 51.89 ms + [ INFO ] Throughput: 343.29 FPS Compare accuracy of the Original and Quantized Models diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png index a03840fa0f4bec..f4e1de3947dc95 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6b92af78802b6615cc954b803042caa64f3969c1947652978beaaf6481a5104 -size 507485 +oid sha256:44dd0441baec6d73d3cc7552c26d2878c806514edacb3da332031733686c654b +size 507432 diff --git a/docs/notebooks/yolov11-object-detection-with-output.rst b/docs/notebooks/yolov11-object-detection-with-output.rst index 40256995ebd00f..d987f4148e7265 100644 --- a/docs/notebooks/yolov11-object-detection-with-output.rst +++ b/docs/notebooks/yolov11-object-detection-with-output.rst @@ -141,7 +141,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -207,14 +207,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.35M/5.35M [00:00<00:00, 24.0MB/s] + 100%|██████████| 5.35M/5.35M [00:00<00:00, 23.2MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 79.4ms - Speed: 2.5ms preprocess, 79.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 78.0ms + Speed: 2.3ms preprocess, 78.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -243,15 +243,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 1.8s, saved as 'yolo11n_openvino_model/' (5.4 MB) - Export complete (2.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (1.9s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=detect model=yolo11n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -326,12 +326,12 @@ ready to check model prediction for object detection. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 20.7ms - Speed: 2.8ms preprocess, 20.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 18.7ms + Speed: 2.0ms preprocess, 18.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -645,8 +645,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 17.5ms - Speed: 1.8ms preprocess, 17.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 19.7ms + Speed: 1.8ms preprocess, 19.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -697,18 +697,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.57 ms + [ INFO ] Read model took 18.73 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -717,14 +717,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 4.68 ms + [ INFO ] Reshape model took 8.03 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 319.17 ms + [ INFO ] Compile model took 316.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -761,17 +761,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.01 ms + [ INFO ] First inference took 30.27 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 18528 iterations - [ INFO ] Duration: 120050.83 ms + [ INFO ] Count: 18480 iterations + [ INFO ] Duration: 120044.94 ms [ INFO ] Latency: - [ INFO ] Median: 37.96 ms - [ INFO ] Average: 38.74 ms - [ INFO ] Min: 19.79 ms - [ INFO ] Max: 98.97 ms - [ INFO ] Throughput: 154.33 FPS + [ INFO ] Median: 38.09 ms + [ INFO ] Average: 38.84 ms + [ INFO ] Min: 20.45 ms + [ INFO ] Max: 97.13 ms + [ INFO ] Throughput: 153.94 FPS .. code:: ipython3 @@ -787,18 +787,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.43 ms + [ INFO ] Read model took 26.48 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -814,7 +814,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 515.22 ms + [ INFO ] Compile model took 556.87 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -851,17 +851,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 19.14 ms + [ INFO ] First inference took 31.34 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15055.81 ms + [ INFO ] Count: 5208 iterations + [ INFO ] Duration: 15031.88 ms [ INFO ] Latency: - [ INFO ] Median: 34.47 ms - [ INFO ] Average: 34.65 ms - [ INFO ] Min: 18.19 ms - [ INFO ] Max: 51.47 ms - [ INFO ] Throughput: 344.32 FPS + [ INFO ] Median: 34.21 ms + [ INFO ] Average: 34.44 ms + [ INFO ] Min: 18.94 ms + [ INFO ] Max: 52.48 ms + [ INFO ] Throughput: 346.46 FPS Next steps diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png index f6ad963c53f95c..02e6e38bb12a7b 100644 --- a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ed070bb0687ea1e958c91f59b98f38b9834911f23f78c1a8aad55bed3c7cf6d -size 569483 +oid sha256:0904f4ceeae66d4c516656b23389b16d93bc601214a4c9b8d1ee30f7b5366526 +size 572206 diff --git a/docs/notebooks/yolov8-obb-with-output.rst b/docs/notebooks/yolov8-obb-with-output.rst index 87e73f76558ffa..3d9ef3d88c0751 100644 --- a/docs/notebooks/yolov8-obb-with-output.rst +++ b/docs/notebooks/yolov8-obb-with-output.rst @@ -173,6 +173,7 @@ instance. + Run inference ~~~~~~~~~~~~~ diff --git a/docs/notebooks/yolov9-optimization-with-output.rst b/docs/notebooks/yolov9-optimization-with-output.rst index 0cf84003171753..39690ea292721d 100644 --- a/docs/notebooks/yolov9-optimization-with-output.rst +++ b/docs/notebooks/yolov9-optimization-with-output.rst @@ -58,22 +58,15 @@ Guide =2023.3.0" "nncf>=2.8.1" "opencv-python" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "nncf>=2.8.1" "opencv-python" "matplotlib>=3.4" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -100,9 +93,9 @@ Prerequisites Cloning into 'yolov9'... remote: Enumerating objects: 781, done. remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1) - Receiving objects: 100% (781/781), 3.27 MiB | 10.53 MiB/s, done. - Resolving deltas: 100% (330/330), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + Receiving objects: 100% (781/781), 3.27 MiB | 19.93 MiB/s, done. + Resolving deltas: 100% (331/331), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -140,7 +133,7 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') @@ -195,9 +188,11 @@ using ``ov.save_model``. .. parsed-literal:: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/experimental.py:243: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + ckpt = torch.load(attempt_download(w), map_location='cpu') # load Fusing layers... Model summary: 387 layers, 25288768 parameters, 0 gradients, 102.1 GFLOPs - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif self.dynamic or self.shape != shape: @@ -578,10 +573,10 @@ asymmetric quantization of activations. .. parsed-literal:: - 2024-10-08 06:55:28.833031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 06:55:28.867508: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 05:32:14.632848: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 05:32:14.668324: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 06:55:29.471960: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 05:32:15.276616: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -669,18 +664,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.27 ms + [ INFO ] Read model took 26.16 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [?,3,?,?] @@ -692,7 +687,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 7.74 ms + [ INFO ] Reshape model took 7.97 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -702,7 +697,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 487.17 ms + [ INFO ] Compile model took 475.37 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -739,17 +734,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 173.19 ms + [ INFO ] First inference took 182.45 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 222 iterations - [ INFO ] Duration: 15646.59 ms + [ INFO ] Count: 228 iterations + [ INFO ] Duration: 15670.39 ms [ INFO ] Latency: - [ INFO ] Median: 412.14 ms - [ INFO ] Average: 418.38 ms - [ INFO ] Min: 285.25 ms - [ INFO ] Max: 798.40 ms - [ INFO ] Throughput: 14.19 FPS + [ INFO ] Median: 412.82 ms + [ INFO ] Average: 410.86 ms + [ INFO ] Min: 309.65 ms + [ INFO ] Max: 431.51 ms + [ INFO ] Throughput: 14.55 FPS .. code:: ipython3 @@ -763,18 +758,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 41.47 ms + [ INFO ] Read model took 41.07 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [1,3,640,640] @@ -786,7 +781,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 0.04 ms + [ INFO ] Reshape model took 0.05 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -796,7 +791,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 870.18 ms + [ INFO ] Compile model took 943.07 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -833,17 +828,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 71.43 ms + [ INFO ] First inference took 64.84 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 726 iterations - [ INFO ] Duration: 15150.76 ms + [ INFO ] Count: 720 iterations + [ INFO ] Duration: 15158.98 ms [ INFO ] Latency: - [ INFO ] Median: 121.03 ms - [ INFO ] Average: 124.70 ms - [ INFO ] Min: 65.35 ms - [ INFO ] Max: 268.30 ms - [ INFO ] Throughput: 47.92 FPS + [ INFO ] Median: 119.92 ms + [ INFO ] Average: 125.90 ms + [ INFO ] Min: 80.89 ms + [ INFO ] Max: 277.84 ms + [ INFO ] Throughput: 47.50 FPS Run Live Object Detection diff --git a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png index 8d7867fc6e5b5a..d6b48379756722 100644 --- a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png +++ b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:071b74de5f9e1088f4d187a6c3d339cb90758fe7d12d7b7de495fd5266e50946 -size 499096 +oid sha256:014be25aa9fb5ebd46a7f69afca67c33d8059694ca54bc2e32c0abfa460cb825 +size 496427