diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml index cd111d617ddc1b..4d772c8f0eeb03 100644 --- a/.github/actions/smart-ci/action.yml +++ b/.github/actions/smart-ci/action.yml @@ -30,7 +30,6 @@ inputs: components_config_schema: description: "Path to the schema file for components configuration" required: false - default: ".github/actions/smart-ci/components_schema.yml" labeler_config: description: "Path to labeler configuration file" required: false @@ -101,7 +100,7 @@ runs: -f "${{ inputs.ref_name }}" \ -p "${{ inputs.component_pattern }}" \ -c "${{ inputs.components_config }}" \ - -m "${{ inputs.components_config_schema }}" \ + -m "${{ inputs.components_config_schema || env.DEFAULT_CONFIG_SCHEMA }}" \ -l "${{ inputs.labeler_config }}" \ --enable_for_org "${{ inputs.enable_for_org }}" \ --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \ @@ -109,3 +108,4 @@ runs: shell: bash env: GITHUB_TOKEN: ${{ inputs.repo_token }} + DEFAULT_CONFIG_SCHEMA: "${{ github.action_path }}/components_schema.yml" diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 15094a84ee8f5f..fca16f2848f7bb 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -53,7 +53,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -98,7 +98,7 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: android_arm64 steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' @@ -116,7 +116,7 @@ jobs: popd - name: Clone vcpkg - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'microsoft/vcpkg' ref: ${{ env.VCPKG_VERSION }} diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index cebaa9177b69b9..1cdb2023784979 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -27,7 +27,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -56,7 +56,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -97,13 +97,13 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: android_x64 steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO GenAI - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino.genai' path: ${{ env.OPENVINO_GENAI_REPO }} @@ -135,6 +135,7 @@ jobs: -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ -DENABLE_LTO=ON \ -DENABLE_PYTHON=OFF \ + -DENABLE_TESTS=ON \ -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_GENAI_REPO }} \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 535be1e4e70457..8c78375e61769c 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -18,7 +18,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' lfs: 'true' @@ -29,7 +29,7 @@ jobs: packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9 version: 3.0 - - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 id: cp310 with: python-version: '3.10' @@ -41,7 +41,7 @@ jobs: - name: Install python dependencies run: | python3 -m pip install -r docs/requirements.txt - (cd docs/openvino_sphinx_theme && python3 setup.py install) + (cd docs/openvino_sphinx_theme && python3 -m pip install .) python3 -m pip install docs/openvino_custom_sphinx_sitemap - name: Download and install doxygen @@ -63,7 +63,7 @@ jobs: - name: Cache documentation id: cache_sphinx_docs - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: build/docs/_build/.doctrees key: sphinx-docs-cache diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml index 5710736f322652..f7f66be299876c 100644 --- a/.github/workflows/check_pr_commits.yml +++ b/.github/workflows/check_pr_commits.yml @@ -9,7 +9,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install dependencies run: python3 -m pip install -r ./.github/github_org_control/requirements.txt diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index 53a426bfa32b42..3fc69b21374093 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -48,7 +48,7 @@ jobs: steps: - name: Checkout cach action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/cache @@ -70,7 +70,7 @@ jobs: steps: - name: Checkout cach action - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/cache diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 5384ece83fb207..9337fdff4b2905 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -28,7 +28,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 777ba7694d0be5..a70d2641cb57f3 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -14,7 +14,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' @@ -44,7 +44,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' @@ -76,7 +76,7 @@ jobs: runs-on: ubuntu-22.04 if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3aecbe2367da05..db5ba3de1a3c85 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Setup python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10.10' architecture: 'x64' @@ -32,7 +32,7 @@ jobs: max-size: 50G - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 8a2338554faae3..d87de4257e0270 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -51,14 +51,14 @@ jobs: apt-get install --assume-yes --no-install-recommends git ca-certificates - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' ref: ${{ inputs.openvinoRef }} - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index f4db0a83a5a39f..73426222253adb 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml index 3dcd9a367b018c..59a1eaa6e1c26f 100644 --- a/.github/workflows/dependency_review.yml +++ b/.github/workflows/dependency_review.yml @@ -9,7 +9,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Dependency Review uses: actions/dependency-review-action@72eb03d02c7872a771aacd928f3123ac62ad6d3a # v4.3.3 diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index 26e8400c22a04f..2f6d646f8e271d 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -32,7 +32,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -65,7 +65,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -109,7 +109,7 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -295,7 +295,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml index 0ec0c409d12e0b..3f685502747a19 100644 --- a/.github/workflows/fedora_29.yml +++ b/.github/workflows/fedora_29.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml index 3733ad48ca49d2..2768e731b6578b 100644 --- a/.github/workflows/files_size.yml +++ b/.github/workflows/files_size.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-22.04 if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: git ls-tree run: git ls-tree -r -t -l --full-name HEAD | sort -n -r -k 4 diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index 86545b6e9e7a43..e491388a4b3606 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -91,7 +91,7 @@ jobs: PRODUCT_TYPE: public_linux_${{ inputs.os }}_${{ inputs.arch }}_release steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -106,7 +106,7 @@ jobs: git rev-parse HEAD - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -331,7 +331,7 @@ jobs: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz ${{ env.BUILD_DIR }}/deb ${{ env.MANIFEST_PATH }} - ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.BUILD_DIR) || '' }} + ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.INSTALL_WHEELS_DIR) || '' }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} env: diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index df2544d9d9e60a..0567109e0dea46 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -17,6 +17,10 @@ on: description: 'Target branch for the build' type: string required: true + cmake-options: + description: 'A string of options passed to CMake' + type: string + required: true permissions: read-all @@ -33,7 +37,6 @@ jobs: runs-on: ${{ inputs.runner }} env: CMAKE_BUILD_TYPE: ${{ inputs.build-type }} - CMAKE_GENERATOR: 'Ninja Multi-Config' CMAKE_CXX_COMPILER_LAUNCHER: ccache CMAKE_C_COMPILER_LAUNCHER: ccache CCACHE_REMOTE_DIR: C:\\mount\\caches\\ccache\\windows2022_x86_64_${{ inputs.build-type }}\\${{ inputs.target-branch }} @@ -51,13 +54,13 @@ jobs: PRODUCT_TYPE: 'public_windows_vs2019_${{ inputs.build-type }}' steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -155,25 +158,13 @@ jobs: - name: CMake configure run: | - cmake -G "${{ env.CMAKE_GENERATOR }}" ` - -DENABLE_CPPLINT=OFF ` - -DBUILD_SHARED_LIBS=ON ` - -DENABLE_TESTS=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` - -DENABLE_STRICT_DEPENDENCIES=OFF ` - -DENABLE_PYTHON=ON ` - -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` - -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }} ` - -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` - -DOPENVINO_EXTRA_MODULES="${{ env.OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${{ env.OPENVINO_CONTRIB_REPO }}/modules/java_api" ` - -S ${{ env.OPENVINO_REPO }} ` - -B ${{ env.BUILD_DIR }} + cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} - name: Clean ccache stats run: '& ccache --zero-stats' - name: Cmake build - OpenVINO - run: cmake --build ${{ env.BUILD_DIR }} --parallel --verbose + run: cmake --build ${{ env.BUILD_DIR }} --config ${{ env.CMAKE_BUILD_TYPE }} --parallel --verbose - name: Show ccache stats run: '& ccache --show-stats' @@ -225,6 +216,7 @@ jobs: if-no-files-found: 'error' - name: Upload openvino wheels + if: ${{ inputs.build-type != 'Debug' }} uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels @@ -254,7 +246,9 @@ jobs: artifacts: | ${{ env.BUILD_DIR }}/openvino_package.zip ${{ env.BUILD_DIR }}/openvino_tests.zip - ${{ env.INSTALL_WHEELS_DIR }}/wheels ${{ env.MANIFEST_PATH }} + ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.INSTALL_WHEELS_DIR) || '' }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} + env: + STORE_WHEELS: ${{ inputs.build-type != 'Debug' }} diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index e197d581f290a4..0366ec47ff437e 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -71,7 +71,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -89,7 +89,7 @@ jobs: run: python3 -m pip install -r ${INSTALL_TEST_DIR}/functional_test_utils/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -109,7 +109,7 @@ jobs: timeout-minutes: 25 - name: Save tests execution time - uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 3f871151ccd282..8fab17043b7465 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -20,13 +20,23 @@ on: description: 'OS that is used for testing in the form of "ubuntu_20_04"' type: string required: true + build-type: + description: 'OpenVINO build type, e.g., "Release"' + type: string + required: false + default: 'Release' + timeout-minutes: + description: 'Timeout in minutes for the job' + type: number + required: false + default: 35 permissions: read-all jobs: CXX_Unit_Tests: name: C++ unit tests - timeout-minutes: ${{ contains(inputs.runner, 'win') && 50 || 35 }} + timeout-minutes: ${{ inputs.timeout-minutes }} runs-on: ${{ inputs.runner }} container: image: ${{ inputs.image }} @@ -165,7 +175,6 @@ jobs: if: ${{ fromJSON(inputs.affected-components).transformations.test && runner.arch != 'ARM64' }} # Ticket: 126281 run: | ${{ env.SETUPVARS_COMMAND }} - ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml - name: Common test utils tests @@ -208,8 +217,9 @@ jobs: ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml + # Disabled for debug build due to long execution time - name: Template plugin func tests - if: fromJSON(inputs.affected-components).TEMPLATE.test + if: ${{ fromJSON(inputs.affected-components).TEMPLATE.test && inputs.build-type != 'debug' }} run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_template_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateFuncTests.xml @@ -231,9 +241,10 @@ jobs: run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_unit_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_unit_tests.xml - + + # Disabled for debug build due to long execution time - name: AutoBatch func tests - if: fromJSON(inputs.affected-components).AUTO_BATCH.test + if: ${{ fromJSON(inputs.affected-components).AUTO_BATCH.test && inputs.build-type != 'debug' }} run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_func_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_func_tests.xml --gtest_filter="*smoke*" @@ -250,8 +261,9 @@ jobs: ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml + # Disabled for debug build due to long execution time - name: Hetero func tests - if: ${{ fromJSON(inputs.affected-components).HETERO.test && inputs.os != 'debian_10' }} # Ticket: 153170 + if: ${{ fromJSON(inputs.affected-components).HETERO.test && inputs.os != 'debian_10' && inputs.build-type != 'debug' }} # Ticket: 153170 run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index b9862eac09cc05..195abbbd5fb0f9 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -74,7 +74,7 @@ jobs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input TZ: "Europe/London" # to prevent tzdata from waiting user input - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ env.PYTHON_VERSION }} diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index ea2669071386dd..43fa8f2a7f1740 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -64,7 +64,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index 0ceb080d82184d..966d258a2fc609 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -63,7 +63,7 @@ jobs: popd - name: Fetch ONNX runtime version and skip tests list - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/frontends/onnx/tests/ci_utils/onnxruntime @@ -77,7 +77,7 @@ jobs: working-directory: ${{ env.ONNX_RUNTIME_UTILS }} - name: Clone ONNX Runtime - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'microsoft/onnxruntime' path: ${{ env.ONNX_RUNTIME_REPO }} diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index e722af78832c12..ecb278fdb54ca3 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -32,7 +32,7 @@ jobs: DISPLAY: ':99' steps: - name: Fetch OpenVINO JS sources - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/bindings/js @@ -52,7 +52,7 @@ jobs: - name: Setup Node ${{ env.NODE_VERSION }} if: runner.os != 'Linux' # Node is already installed in the Docker image - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 1fafafd7623545..1dd6ebbfc204d8 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -76,7 +76,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -248,7 +248,7 @@ jobs: - name: Clone API snippets if: runner.os != 'macOS' - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: docs/articles_en/assets/snippets path: ${{ env.OPENVINO_REPO }} diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index c6cd97422f2b95..4ea6d33336fc87 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -83,7 +83,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 22a09dffba779f..2959728e39dee7 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -22,7 +22,7 @@ permissions: read-all jobs: PyTorch_Models_Tests: name: PyTorch Models tests - timeout-minutes: ${{ inputs.model_scope == 'precommit' && 35 || 400 }} + timeout-minutes: ${{ inputs.model_scope == 'precommit' && 40 || 400 }} runs-on: ${{ inputs.runner }} container: ${{ fromJSON(inputs.container) }} defaults: @@ -77,7 +77,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -105,8 +105,7 @@ jobs: python3 -m pip install ./openvino_tokenizers-* working-directory: ${{ env.INSTALL_WHEELS_DIR }} - - name: Install PyTorch tests requirements for precommit - if: ${{ inputs.model_scope == 'precommit' }} + - name: Install PyTorch tests requirements run: | python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_pytorch env: @@ -116,7 +115,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope1' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 2 -k "TestTimmConvertModel or TestTorchHubConvertModel or TestEdsrConvertModel" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 4 -k "TestTimmConvertModel or TestTorchHubConvertModel or TestEdsrConvertModel" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 7cde4e6fd18eae..e144aa0cfb95aa 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -68,7 +68,7 @@ jobs: - name: Fetch setup_python action # Python is already installed on Ubuntu within Dockerfile if: runner.os != 'Linux' - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 977b2e4f96af73..26730f9b55df7a 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -83,7 +83,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 0990eae3de6e7e..5321beb8703de1 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -69,7 +69,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index f7388eb98a2f3c..5c5e59aa3bec97 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -20,12 +20,15 @@ on: description: 'Components that are affected by changes in the commit defined by the Smart CI Action' type: string required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true permissions: read-all env: PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: @@ -52,7 +55,7 @@ jobs: echo "EXTENSION_BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python @@ -63,16 +66,16 @@ jobs: if: runner.os == 'macOS' run: brew install pigz - - name: Setup Python ${{ env.PYTHON_VERSION }} + - name: Setup Python ${{ inputs.python-version }} uses: ./.github/actions/setup_python with: - version: ${{ env.PYTHON_VERSION }} + version: ${{ inputs.python-version }} pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} should-setup-pip-paths: ${{ runner.os == 'Linux' }} self-hosted-runner: ${{ runner.os == 'Linux' }} - name: Clone OpenVINO Tokenizers - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_tokenizers' path: ${{ env.OPENVINO_TOKENIZERS_REPO }} diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 4c631b673f8cc2..00f3a321e0dd1f 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -26,7 +26,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Checkout Labeller Script - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github' diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 0345d5259e8182..0af30621a2a7fd 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -28,7 +28,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -62,7 +62,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -146,6 +146,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 42d7810b9f1663..acb7e1271d7a34 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -29,7 +29,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +63,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -109,13 +109,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} @@ -282,13 +282,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index f67b011b0caed5..c450a5d30768e4 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -28,7 +28,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +63,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -102,7 +102,7 @@ jobs: if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index e098b637150834..4bb597d83fadc8 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -52,7 +52,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -107,13 +107,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -280,7 +280,7 @@ jobs: popd - name: Fetch Sanitizer Suppression Lists - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | tests/sanitizers/lsan/suppressions.txt diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index bddbaab134fa9c..5492ad40aa17b4 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -42,7 +42,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -82,13 +82,13 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -258,6 +258,7 @@ jobs: runner: 'macos-13' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 576eefde8c9b4a..8100b74734ab17 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -42,7 +42,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -82,13 +82,13 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -258,6 +258,7 @@ jobs: runner: 'macos-13-xlarge' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 151227f111c9e0..f48986d4a0d304 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -24,15 +24,15 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' - name: Cache pip - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('tools/mo/requirements*.txt') }} diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index ee5f3e58e363e6..4d69563a741d3a 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -19,15 +19,15 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' - name: Cache pip - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('src/bindings/python/requirements*.txt') }} diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index 2b0d3f2272787f..caed37eee89056 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -28,10 +28,10 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.9' diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index 687b79d2606bec..ba38d6a9f90fed 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github' diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 6056c20945c801..63a1fab87d566f 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -30,7 +30,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -64,7 +64,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 5e5ac3c3482624..92178fce7f5054 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -32,7 +32,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -66,7 +66,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -183,7 +183,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -459,7 +459,7 @@ jobs: popd - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -491,6 +491,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS iGPU: diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml index 9ca27262a5dcde..48230155f7e903 100644 --- a/.github/workflows/ubuntu_22_dpcpp.yml +++ b/.github/workflows/ubuntu_22_dpcpp.yml @@ -20,7 +20,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -54,7 +54,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index bb147450438160..d874e06a189232 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -27,7 +27,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -61,7 +61,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -144,9 +144,31 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + TensorFlow_Layer_Tests: + name: TensorFlow Layer Tests + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] + uses: ./.github/workflows/job_tensorflow_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + + Openvino_tokenizers: + name: OpenVINO tokenizers extension + needs: [ Build, Smart_CI, Docker ] + uses: ./.github/workflows/job_tokenizers.yml + with: + runner: 'aks-linux-4-cores-16gb' + shell: bash + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS + Overall_Status: name: ci/gha_overall_status_ubuntu_24 - needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests] + needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests, Pytorch_Layer_Tests, TensorFlow_Layer_Tests, Openvino_tokenizers] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index c4d835637352ad..45d6c9ce98317a 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -91,7 +91,7 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: webassembly_Release steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 30b2ce2f20df38..d93c737606a2b4 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -30,7 +30,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -73,13 +73,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -282,13 +282,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -369,7 +369,7 @@ jobs: run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -387,7 +387,7 @@ jobs: run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} diff --git a/.github/workflows/windows_vs2019_debug.yml b/.github/workflows/windows_vs2019_debug.yml index f36c3f33031f7d..68a99055f5bdb8 100644 --- a/.github/workflows/windows_vs2019_debug.yml +++ b/.github/workflows/windows_vs2019_debug.yml @@ -2,6 +2,7 @@ name: Windows (VS 2019, Python 3.11, Debug) on: workflow_dispatch: merge_group: + pull_request: push: branches: - master @@ -25,7 +26,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -52,10 +53,21 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_windows.yml with: - runner: 'aks-win-16-cores-32gb' + runner: 'aks-win-32-cores-128gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Debug' target-branch: ${{ needs.smart_ci.outputs.target_branch }} + cmake-options: |- + -G "Ninja" ` + -DENABLE_PYTHON=OFF ` + -DENABLE_CPPLINT=OFF ` + -DBUILD_SHARED_LIBS=ON ` + -DENABLE_TESTS=ON ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` + -DENABLE_STRICT_DEPENDENCIES=OFF ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` + -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" CXX_Unit_Tests: name: C++ unit tests @@ -65,7 +77,9 @@ jobs: runner: 'aks-win-4-cores-8gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'windows_2019' - + build-type: 'Debug' + timeout-minutes: 60 + Overall_Status: name: ci/gha_overall_status_windows_debug needs: [ Smart_CI, Build, CXX_Unit_Tests ] @@ -78,4 +92,4 @@ jobs: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} - run: exit 1 + run: exit 1 \ No newline at end of file diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index bce90165408815..b9b8fa76d37c34 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -28,7 +28,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -59,6 +59,17 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Release' target-branch: ${{ needs.smart_ci.outputs.target_branch }} + cmake-options: |- + -G "Ninja Multi-Config" ` + -DENABLE_PYTHON=ON ` + -DENABLE_CPPLINT=OFF ` + -DBUILD_SHARED_LIBS=ON ` + -DENABLE_TESTS=ON ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` + -DENABLE_STRICT_DEPENDENCIES=OFF ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` + -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" Samples: needs: [ Build, Smart_CI ] @@ -98,7 +109,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -165,7 +176,7 @@ jobs: steps: - name: Fetch OpenVINO JS sources - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/bindings/js @@ -178,7 +189,7 @@ jobs: path: ${{ env.OPENVINO_JS_LIBS_DIR }} - name: Setup Node ${{ env.NODE_VERSION }} - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} @@ -224,6 +235,7 @@ jobs: runner: 'aks-win-4-cores-8gb' shell: pwsh affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS Python_Unit_Tests: @@ -263,7 +275,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -424,6 +436,8 @@ jobs: runner: 'aks-win-4-cores-8gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'windows_2019' + build-type: 'Release' + timeout-minutes: 50 CPU_Functional_Tests: name: CPU functional tests @@ -463,7 +477,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -481,7 +495,7 @@ jobs: run: python3 -m pip install -r ${{ github.workspace }}\install\tests\functional_test_utils\layer_tests_summary\requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -495,7 +509,7 @@ jobs: timeout-minutes: 60 - name: Save tests execution time - uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 7da00df4e46d32..55ecc2500635b1 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -3,11 +3,17 @@ name: Rerun Workflow with Known Errors on: workflow_run: workflows: - - Linux (Ubuntu 20.04, Python 3.11) + - Linux (Ubuntu 20.04, Python 3.9) + - Linux (Ubuntu 22.04, Python 3.11) + - Linux (Ubuntu 24.04, Python 3.12) + - Debian 10 ARM + - Android ARM64 with vcpkg + - Android x64 - Linux ARM64 (Ubuntu 20.04, Python 3.11) - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) - - Windows (VS 2019, Python 3.11) + - Windows (VS 2019, Python 3.11, Release) + - Windows (VS 2019, Python 3.11, Debug) - Windows Conditional Compilation (VS 2022, Python 3.11) types: - completed @@ -31,7 +37,7 @@ jobs: checks: read steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github/scripts/workflow_rerun' @@ -56,13 +62,17 @@ jobs: if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} run: echo "Rerun retriggered for ${{ github.event.workflow_run.html_url }} with ticket ${{ env.FOUND_ERROR_TICKET }}" + - name: ${{ github.event.workflow_run.html_url }} + if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} + run: echo "Step for statistics gathering" + rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} runs-on: aks-linux-2-cores-8gb steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github/scripts/workflow_rerun' lfs: true diff --git a/SECURITY.md b/SECURITY.md index eb482d90983db3..1840023d700663 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,5 +1,10 @@ # Security Policy +## Security practices + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/9611/badge)](https://www.bestpractices.dev/projects/9611) +[![Coverity](https://scan.coverity.com/projects/21921/badge.svg)](https://scan.coverity.com/projects/openvino) + ## Report a Vulnerability Please report security issues or vulnerabilities to the [Intel® Security Center]. diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index d2aa0410476245..0815297a11a5eb 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -304,6 +304,9 @@ macro(ov_add_frontend) # then we need to mark it to be CXX ABI free ov_abi_free_target(${TARGET_NAME}) + # public target name + set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) + # installation if(NOT OV_FRONTEND_SKIP_INSTALL) @@ -351,9 +354,6 @@ macro(ov_add_frontend) COMPONENT ${dev_component} ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL} FILES_MATCHING PATTERN "*.hpp") - - # public target name - set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) endif() else() # skipped frontend has to be installed in static libraries case diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 0ec054da853e2c..4ac0124d3089f0 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -30,7 +30,7 @@ macro(ov_common_libraries_cpack_set_dirs) ov_get_pyversion(pyversion) if(pyversion) - # should not be used in production; only by setup.py install + # should not be used in production; only by pip install set(OV_CPACK_PYTHONDIR lib/${pyversion}/site-packages) endif() @@ -94,7 +94,7 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - # we don't pack artifacts of setup.py install, because it's called explicitly in conda / brew + # we don't pack artifacts of pip install, because it's called explicitly in conda / brew # or not used at all like in cases with conan / vcpkg set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) # we don't need wheels in the distribution packages diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index 2b95fcfde5c145..a23d5290044e3d 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -95,12 +95,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 45d9b0c0ca2121..bb4e7942d7640b 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -86,12 +86,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake index 7dd4e1e7f96ded..b331d370bfe7bf 100644 --- a/cmake/toolchains/mt.runtime.win32.toolchain.cmake +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -27,6 +27,11 @@ if(use_static_runtime) foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") set(flag_var "CMAKE_${lang}_FLAGS${build_type}_INIT") string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + if (build_type STREQUAL "_DEBUG") + set(${flag_var} "/MTd") + else() + set(${flag_var} "/MT") + endif() endforeach() endforeach() endif() diff --git a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake index f5e9e68aabedc6..5bc16de8df91e8 100644 --- a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV0p7 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake index e00e30f975598f..0664b38a9ba68d 100644 --- a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV1p0 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-gnu.toolchain.cmake b/cmake/toolchains/riscv64-gnu.toolchain.cmake index 994b05f66b52f6..b58dcf169fc2da 100644 --- a/cmake/toolchains/riscv64-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-gnu.toolchain.cmake @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#llvm-clang-tool-chain +# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#gnu-toolchain set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER_TARGET riscv64-unknown-linux-gnu) @@ -26,9 +26,6 @@ set(CMAKE_OBJDUMP ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-objdump) set(CMAKE_READELF ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-readelf) set(PKG_CONFIG_EXECUTABLE "NOT-FOUND" CACHE PATH "Path to RISC-V pkg-config") -# Don't run the linker on compiler check -set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - set(CMAKE_SHARED_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_EXE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_MODULE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") diff --git a/cmake/toolchains/riscv64.linux.toolchain.cmake b/cmake/toolchains/riscv64.linux.toolchain.cmake new file mode 100644 index 00000000000000..cb088f5eca5052 --- /dev/null +++ b/cmake/toolchains/riscv64.linux.toolchain.cmake @@ -0,0 +1,13 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Install compiler on debian using: +# apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(CMAKE_C_COMPILER riscv64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER riscv64-linux-gnu-g++) +set(CMAKE_STRIP riscv64-linux-gnu-strip) diff --git a/conan.lock b/conan.lock index a21a2a8d7b52f8..f78ad37f8f69ac 100644 --- a/conan.lock +++ b/conan.lock @@ -10,7 +10,7 @@ "opencl-icd-loader/2023.04.17#5f73dd9f0c023d416a7f162e320b9c77%1692732261.088", "opencl-headers/2023.04.17#3d98f2d12a67c2400de6f11d5335b5a6%1683936272.16", "opencl-clhpp-headers/2023.04.17#7c62fcc7ac2559d4839150d2ebaac5c8%1685450803.672", - "onnx/1.16.0#4d2d4f24d6f73b8a7551e001839631f0%1712404811.278", + "onnx/1.17.0#c79fdfca3ae149874153de15a20f4598%1727864447.241", "onetbb/2021.10.0#cbb2fc43088070b48f6e4339bc8fa0e1%1693812561.235", "ittapi/3.24.0#9246125f13e7686dee2b0c992b71db94%1682969872.743", "hwloc/2.9.2#1c63e2eccac57048ae226e6c946ebf0e%1688677682.002", @@ -33,4 +33,4 @@ ], "python_requires": [], "config_requires": [] -} \ No newline at end of file +} diff --git a/conanfile.txt b/conanfile.txt index f124179d52bf12..46d6d8d65d34e9 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -7,7 +7,7 @@ opencl-icd-loader/[>=2023.04.17] rapidjson/[>=1.1.0] xbyak/[>=6.62] snappy/[>=1.1.7] -onnx/1.16.0 +onnx/1.17.0 pybind11/[>=2.12.0] flatbuffers/[>=22.9.24] diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index b9f15bbe12cccc..a49a9d8dba81d1 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -57,21 +57,21 @@ Feature Support and API Coverage +-------------------------+-----------+------------------+-------------------+ | **API Coverage:** | plugin | infer_request | compiled_model | +=========================+===========+==================+===================+ -| CPU | 80.0 % | 100.0 % | 89.74 % | +| CPU | 98.31 % | 100.0 % | 90.7 % | +-------------------------+-----------+------------------+-------------------+ | CPU_ARM | 80.0 % | 100.0 % | 89.74 % | +-------------------------+-----------+------------------+-------------------+ -| GPU | 84.0 % | 100.0 % | 100.0 % | +| GPU | 91.53 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| dGPU | 82.0 % | 100.0 % | 100.0 % | +| dGPU | 89.83 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| NPU | 16.0 % | 0.0 % | 10.26 % | +| NPU | 18.64 % | 0.0 % | 9.3 % | +-------------------------+-----------+------------------+-------------------+ -| AUTO | 40.0 % | 100.0 % | 97.44 % | +| AUTO | 93.88 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| BATCH | 26.0 % | 100.0 % | 58.97 % | +| BATCH | 86.05 % | 100.0 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ -| HETERO | 30.0 % | 99.23 % | 58.97 % | +| HETERO | 61.22 % | 99.24 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ | || Percentage of API supported by the device, | | || as of OpenVINO 2023.3, 08 Jan, 2024. | diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst index 5bd414776784ce..d27f7626391f46 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst @@ -9,6 +9,8 @@ Here, you will find comprehensive information on operations supported by OpenVIN conformance reports provide operation coverage for inference devices, while the tables list operations available for all OpenVINO framework frontends. +Data as of OpenVINO 2024.4, 18 Oct. 2024. + **Device-operation conformance reports:** .. grid:: 1 1 2 2 @@ -32,6 +34,7 @@ operations available for all OpenVINO framework frontends. ops including dynamic inputs + **Operations supported by OpenVINO frontend Frameworks:** .. tab-set:: diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 39b27d12c970fd..b8256af650e2f8 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -3,9 +3,11 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. +The current data is as of OpenVINO 2024.4, 24 Oct. 2024 -The tables below list key performance indicators for a selection of Large Language Models, -running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. +The tables below list the key performance indicators for a selection of Large Language Models, +running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra +7-288V based system, on built-in GPUs. @@ -15,16 +17,13 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. -.. tab-set:: - - .. tab-item:: OpenVINO - - .. csv-table:: - :class: modeldata stripe - :name: supportedModelsTableOv - :header-rows: 1 - :file: ../../_static/benchmarks_files/llm_models.csv +.. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models.csv +| .. grid:: 1 1 2 2 :gutter: 4 @@ -34,18 +33,17 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. All models listed here were tested with the following parameters: * Framework: PyTorch - * Model precision: INT4 * Beam: 1 * Batch size: 1 .. grid-item:: - .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/OV-2024.4-platform_list.pdf + .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/llm_models_platform_list_.pdf :color: primary :outline: :expand: - :material-regular:`download;1.5em` Get full system info [PDF] + :material-regular:`download;1.5em` Get system descriptions [PDF] .. button-link:: ../../_static/benchmarks_files/llm_models.csv :color: primary diff --git a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst index e35d42a6a02abc..936f1145a6b3b0 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst @@ -41,7 +41,7 @@ dedicated only to measuring performance. The benchmark setup for OVMS consists of four main parts: - .. image:: ../assets/images/performance_benchmarks_ovms_02.png + .. image:: ../../assets/images/performance_benchmarks_ovms_02.png :alt: OVMS Benchmark Setup Diagram * **OpenVINO™ Model Server** is launched as a docker container on the server platform and it diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst index a3028755299b45..ed77c97e21b0ee 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst @@ -10,6 +10,7 @@ Available Operation Sets :maxdepth: 1 :hidden: + available-opsets/opset15 available-opsets/opset14 available-opsets/opset13 available-opsets/opset12 @@ -34,7 +35,9 @@ This topic provides a complete list of available sets of operations supported in :header-rows: 1 * - OpenVINO™ Version - - Actual Operations Set + - Operation Set + * - 2024.5 + - :doc:`opset15 ` * - 2024.0 - :doc:`opset14 ` * - 2023.2 @@ -71,4 +74,3 @@ See Also * :doc:`Operation Sets in OpenVINO <../operation-sets>` * :doc:`OpenVINO IR format <../../openvino-ir-format>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst new file mode 100644 index 00000000000000..b32d7b71580177 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst @@ -0,0 +1,216 @@ +opset15 +======= + + +.. meta:: + :description: Explore the examples of operation instances expressed as IR + XML snippets in the opset15 operation set, supported in OpenVINO™ + toolkit. + +This specification document describes the ``opset15`` operation set supported in OpenVINO™. +Support for each particular operation from the list below depends on the capabilities of an inference plugin +and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset15``. + + +Table of Contents +################## + +* :doc:`Abs <../operation-specs/arithmetic/abs-1>` +* :doc:`Acos <../operation-specs/arithmetic/acos-1>` +* :doc:`Acosh <../operation-specs/arithmetic/acosh-3>` +* :doc:`AdaptiveAvgPool <../operation-specs/pooling/adaptive-avg-pool-8>` +* :doc:`AdaptiveMaxPool <../operation-specs/pooling/adaptive-max-pool-8>` +* :doc:`Add <../operation-specs/arithmetic/add-1>` +* :doc:`Asin <../operation-specs/arithmetic/asin-1>` +* :doc:`Asinh <../operation-specs/arithmetic/asinh-3>` +* :doc:`Assign <../operation-specs/infrastructure/assign-3>` +* :doc:`Atan <../operation-specs/arithmetic/atan-1>` +* :doc:`Atanh <../operation-specs/arithmetic/atanh-3>` +* :doc:`AvgPool <../operation-specs/pooling/avg-pool-14>` +* :doc:`BatchNormInference <../operation-specs/normalization/batch-norm-inference-5>` +* :doc:`BatchToSpace <../operation-specs/movement/batch-to-space-2>` +* :doc:`BinaryConvolution <../operation-specs/convolution/binary-convolution-1>` +* :doc:`BitwiseAnd <../operation-specs/bitwise/bitwise-and-13>` +* :doc:`BitwiseOr <../operation-specs/bitwise/bitwise-or-13>` +* :doc:`BitwiseXor <../operation-specs/bitwise/bitwise-xor-13>` +* :doc:`BitwiseLeftShift <../operation-specs/bitwise/bitwise-left-shift-15>` +* :doc:`BitwiseRightShift <../operation-specs/bitwise/bitwise-right-shift-15>` +* :doc:`BitwiseNot <../operation-specs/bitwise/bitwise-not-13>` +* :doc:`Broadcast <../operation-specs/movement/broadcast-3>` +* :doc:`Bucketize <../operation-specs/condition/bucketize-3>` +* :doc:`CTCGreedyDecoder <../operation-specs/sequence/ctc-greedy-decoder-1>` +* :doc:`CTCGreedyDecoderSeqLen <../operation-specs/sequence/ctc-greedy-decoder-seq-len-6>` +* :doc:`CTCLoss <../operation-specs/sequence/ctc-loss-4>` +* :doc:`Ceiling <../operation-specs/arithmetic/ceiling-1>` +* :doc:`Clamp <../operation-specs/activation/clamp-1>` +* :doc:`Col2Im <../operation-specs/movement/col2im-15>` +* :doc:`Concat <../operation-specs/movement/concat-1>` +* :doc:`Constant <../operation-specs/infrastructure/constant-1>` +* :doc:`Convert <../operation-specs/type/convert-1>` +* :doc:`ConvertLike <../operation-specs/type/convert-like-1>` +* :doc:`ConvertPromoteTypes <../operation-specs/type/convert-promote-types-14>` +* :doc:`Convolution <../operation-specs/convolution/convolution-1>` +* :doc:`ConvolutionBackpropData <../operation-specs/convolution/convolution-backprop-data-1>` +* :doc:`Cos <../operation-specs/arithmetic/cos-1>` +* :doc:`Cosh <../operation-specs/arithmetic/cosh-1>` +* :doc:`CumSum <../operation-specs/arithmetic/cumsum-3>` +* :doc:`DeformableConvolution <../operation-specs/convolution/deformable-convolution-8>` +* :doc:`DeformablePSROIPooling <../operation-specs/detection/deformable-psroi-pooling-1>` +* :doc:`DepthToSpace <../operation-specs/movement/depth-to-space-1>` +* :doc:`DetectionOutput <../operation-specs/detection/detectionoutput-8>` +* :doc:`DFT <../operation-specs/signals/dft-7>` +* :doc:`Divide <../operation-specs/arithmetic/divide-1>` +* :doc:`Einsum <../operation-specs/matrix/einsum-7>` +* :doc:`Elu <../operation-specs/activation/elu-1>` +* :doc:`EmbeddingBagOffsetsSum <../operation-specs/sparse/embedding-bag-offsets-sum-3>` +* :doc:`EmbeddingBagOffsets <../operation-specs/sparse/embedding-bag-offsets-15>` +* :doc:`EmbeddingBagPackedSum <../operation-specs/sparse/embedding-bag-packed-sum-3>` +* :doc:`EmbeddingBagPacked <../operation-specs/sparse/embedding-bag-packed-15>` +* :doc:`EmbeddingSegmentsSum <../operation-specs/sparse/embedding-segments-sum-3>` +* :doc:`Equal <../operation-specs/comparison/equal-1>` +* :doc:`Erf <../operation-specs/arithmetic/erf-1>` +* :doc:`Exp <../operation-specs/activation/exp-1>` +* :doc:`ExperimentalDetectronDetectionOutput_6 <../operation-specs/detection/experimental-detectron-detection-output-6>` +* :doc:`ExperimentalDetectronGenerateProposalsSingleImage_6 <../operation-specs/detection/experimental-detectron-generate-proposals-single-image-6>` +* :doc:`ExperimentalDetectronPriorGridGenerator_6 <../operation-specs/detection/experimental-detectron-prior-grid-generator-6>` +* :doc:`ExperimentalDetectronROIFeatureExtractor_6 <../operation-specs/detection/experimental-detectron-roi-feature-extractor-6>` +* :doc:`ExperimentalDetectronTopKROIs_6 <../operation-specs/sort/experimental-detectron-top-krois-6>` +* :doc:`ExtractImagePatches <../operation-specs/movement/extract-image-patches-3>` +* :doc:`Eye <../operation-specs/generation/eye-9>` +* :doc:`FakeConvert <../operation-specs/quantization/fake-convert-13>` +* :doc:`FakeQuantize <../operation-specs/quantization/fake-quantize-1>` +* :doc:`Floor <../operation-specs/arithmetic/floor-1>` +* :doc:`FloorMod <../operation-specs/arithmetic/floormod-1>` +* :doc:`Gather <../operation-specs/movement/gather-8>` +* :doc:`GatherElements <../operation-specs/movement/gather-elements-6>` +* :doc:`GatherND <../operation-specs/movement/gather-nd-8>` +* :doc:`GatherTree <../operation-specs/movement/gather-tree-1>` +* :doc:`Gelu <../operation-specs/activation/gelu-7>` +* :doc:`GenerateProposals <../operation-specs/detection/generate-proposals-9>` +* :doc:`Greater <../operation-specs/comparison/greater-1>` +* :doc:`GreaterEqual <../operation-specs/comparison/greater-equal-1>` +* :doc:`GridSample <../operation-specs/image/grid-sample-9>` +* :doc:`GRN <../operation-specs/normalization/grn-1>` +* :doc:`GroupConvolution <../operation-specs/convolution/group-convolution-1>` +* :doc:`GroupConvolutionBackpropData <../operation-specs/convolution/group-convolution-backprop-data-1>` +* :doc:`GroupNormalization <../operation-specs/normalization/group-normalization-12>` +* :doc:`GRUCell <../operation-specs/sequence/gru-cell-3>` +* :doc:`GRUSequence <../operation-specs/sequence/gru-sequence-5>` +* :doc:`HardSigmoid <../operation-specs/activation/hard-sigmoid-1>` +* :doc:`HSigmoid <../operation-specs/activation/hsigmoid-5>` +* :doc:`HSwish <../operation-specs/activation/hswish-4>` +* :doc:`IDFT <../operation-specs/signals/idft-7>` +* :doc:`I420toBGR <../operation-specs/image/i420-to-bgr-8>` +* :doc:`I420toRGB <../operation-specs/image/i420-to-rgb-8>` +* :doc:`If <../operation-specs/condition/if-8>` +* :doc:`Interpolate <../operation-specs/image/interpolate-11>` +* :doc:`Inverse <../operation-specs/matrix/inverse-14>` +* :doc:`IRDFT <../operation-specs/signals/irdft-9>` +* :doc:`IsInf <../operation-specs/comparison/isinf-10>` +* :doc:`IsNaN <../operation-specs/comparison/isnan-10>` +* :doc:`Less <../operation-specs/comparison/less-1>` +* :doc:`LessEqual <../operation-specs/comparison/lessequal-1>` +* :doc:`Log <../operation-specs/arithmetic/log-1>` +* :doc:`LogicalAnd <../operation-specs/logical/logical-and-1>` +* :doc:`LogicalNot <../operation-specs/logical/logical-not-1>` +* :doc:`LogicalOr <../operation-specs/logical/logical-or-1>` +* :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` +* :doc:`LogSoftmax <../operation-specs/activation/log-soft-max-5>` +* :doc:`Loop <../operation-specs/infrastructure/loop-5>` +* :doc:`LRN <../operation-specs/normalization/lrn-1>` +* :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`MatMul <../operation-specs/matrix/matmul-1>` +* :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` +* :doc:`MaxPool <../operation-specs/pooling/max-pool-14>` +* :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` +* :doc:`Minimum <../operation-specs/arithmetic/minimum-1>` +* :doc:`Mish <../operation-specs/activation/mish-4>` +* :doc:`Mod <../operation-specs/arithmetic/mod-1>` +* :doc:`MVN <../operation-specs/normalization/mvn-6>` +* :doc:`MulticlassNMS <../operation-specs/sort/multiclass-non-max-suppression-9>` +* :doc:`Multinomial <../operation-specs/generation/multinomial-13>` +* :doc:`Multiply <../operation-specs/arithmetic/multiply-1>` +* :doc:`Negative <../operation-specs/arithmetic/negative-1>` +* :doc:`NMSRotated <../operation-specs/sort/nms-rotated-13>` +* :doc:`NonMaxSuppression <../operation-specs/sort/non-max-suppression-9>` +* :doc:`NonZero <../operation-specs/condition/nonzero-3>` +* :doc:`NormalizeL2 <../operation-specs/normalization/normalize-l2-1>` +* :doc:`NotEqual <../operation-specs/comparison/notequal-1>` +* :doc:`NV12toBGR <../operation-specs/image/nv12-to-bgr-8>` +* :doc:`NV12toRGB <../operation-specs/image/nv12-to-rgb-8>` +* :doc:`OneHot <../operation-specs/sequence/one-hot-1>` +* :doc:`Pad <../operation-specs/movement/pad-12>` +* :doc:`Parameter <../operation-specs/infrastructure/parameter-1>` +* :doc:`Power <../operation-specs/arithmetic/power-1>` +* :doc:`PReLU <../operation-specs/activation/prelu-1>` +* :doc:`PriorBoxClustered <../operation-specs/detection/prior-box-clustered-1>` +* :doc:`PriorBox <../operation-specs/detection/prior-box-8>` +* :doc:`Proposal <../operation-specs/detection/proposal-4>` +* :doc:`PSROIPooling <../operation-specs/detection/psroi-pooling-1>` +* :doc:`RandomUniform <../operation-specs/generation/random-uniform-8>` +* :doc:`Range <../operation-specs/generation/range-4>` +* :doc:`RDFT <../operation-specs/signals/rdft-9>` +* :doc:`ReLU <../operation-specs/activation/relu-1>` +* :doc:`ReadValue <../operation-specs/infrastructure/read-value-3>` +* :doc:`ReduceL1 <../operation-specs/reduction/reduce-l1-4>` +* :doc:`ReduceL2 <../operation-specs/reduction/reduce-l2-4>` +* :doc:`ReduceLogicalAnd <../operation-specs/reduction/reduce-logical-and-1>` +* :doc:`ReduceLogicalOr <../operation-specs/reduction/reduce-logical-or-1>` +* :doc:`ReduceMax <../operation-specs/reduction/reduce-max-1>` +* :doc:`ReduceMean <../operation-specs/reduction/reduce-mean-1>` +* :doc:`ReduceMin <../operation-specs/reduction/reduce-min-1>` +* :doc:`ReduceProd <../operation-specs/reduction/reduce-prod-1>` +* :doc:`ReduceSum <../operation-specs/reduction/reduce-sum-1>` +* :doc:`RegionYolo <../operation-specs/detection/region-yolo-1>` +* :doc:`ReorgYolo <../operation-specs/detection/reorg-yolo-1>` +* :doc:`Reshape <../operation-specs/shape/reshape-1>` +* :doc:`Result <../operation-specs/infrastructure/result-1>` +* :doc:`ReverseSequence <../operation-specs/movement/reverse-sequence-1>` +* :doc:`RNNCell <../operation-specs/sequence/rnn-cell-3>` +* :doc:`RNNSequence <../operation-specs/sequence/rnn-sequence-5>` +* :doc:`ROIAlign <../operation-specs/detection/roi-align-9>` +* :doc:`ROIAlignRotated <../operation-specs/detection/roi-align-rotated-15>` +* :doc:`ROIPooling <../operation-specs/detection/roi-pooling-1>` +* :doc:`Roll <../operation-specs/movement/roll-7>` +* :doc:`Round <../operation-specs/arithmetic/round-5>` +* :doc:`ScaledDotProductAttention <../operation-specs/sequence/scaled-dot-product-attention>` +* :doc:`ScatterElementsUpdate <../operation-specs/movement/scatter-elements-update-12>` +* :doc:`ScatterNDUpdate <../operation-specs/movement/scatter-nd-update-15>` +* :doc:`ScatterUpdate <../operation-specs/movement/scatter-update-3>` +* :doc:`SearchSorted <../operation-specs/sort/search-sorted-15>` +* :doc:`Select <../operation-specs/condition/select-1>` +* :doc:`Selu <../operation-specs/activation/selu-1>` +* :doc:`ShapeOf <../operation-specs/shape/shape-of-3>` +* :doc:`ShuffleChannels <../operation-specs/movement/shuffle-channels-1>` +* :doc:`Sigmoid <../operation-specs/activation/sigmoid-1>` +* :doc:`Sign <../operation-specs/arithmetic/sign-1>` +* :doc:`Sin <../operation-specs/arithmetic/sin-1>` +* :doc:`Sinh <../operation-specs/arithmetic/sinh-1>` +* :doc:`Slice <../operation-specs/movement/slice-8>` +* :doc:`SliceScatter <../operation-specs/movement/slice-scatter-15>` +* :doc:`SoftMax <../operation-specs/activation/softmax-8>` +* :doc:`SoftPlus <../operation-specs/activation/softplus-4>` +* :doc:`SoftSign <../operation-specs/activation/softsign-9>` +* :doc:`SpaceToBatch <../operation-specs/movement/space-to-batch-2>` +* :doc:`SpaceToDepth <../operation-specs/movement/space-to-depth-1>` +* :doc:`Split <../operation-specs/movement/split-1>` +* :doc:`Sqrt <../operation-specs/arithmetic/sqrt-1>` +* :doc:`SquaredDifference <../operation-specs/arithmetic/squared-difference-1>` +* :doc:`Squeeze <../operation-specs/shape/squeeze-1>` +* :doc:`STFT <../operation-specs/signals/stft-15>` +* :doc:`StridedSlice <../operation-specs/movement/strided-slice-1>` +* :doc:`StringTensorPack <../operation-specs/type/string-tensor-pack-15>` +* :doc:`StringTensorUnpack <../operation-specs/type/string-tensor-unpack-15>` +* :doc:`Subtract <../operation-specs/arithmetic/subtract-1>` +* :doc:`Swish <../operation-specs/activation/swish-4>` +* :doc:`Tan <../operation-specs/arithmetic/tan-1>` +* :doc:`Tanh <../operation-specs/arithmetic/tanh-1>` +* :doc:`TensorIterator <../operation-specs/infrastructure/tensor-iterator-1>` +* :doc:`Tile <../operation-specs/movement/tile-1>` +* :doc:`TopK <../operation-specs/sort/top-k-11>` +* :doc:`Transpose <../operation-specs/movement/transpose-1>` +* :doc:`Unique <../operation-specs/movement/unique-10>` +* :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` +* :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 7ac47116595621..8eccea47c31dd0 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -29,8 +29,10 @@ Operation Specifications BatchToSpace-2 BinaryConvolution-1 BitwiseAnd-13 + BitwiseLeftShift-15 BitwiseNot-13 BitwiseOr-13 + BitwiseRightShift-15 BitwiseXor-13 Broadcast-1 Broadcast-3 @@ -39,6 +41,7 @@ Operation Specifications CTCGreedyDecoderSeqLen-6 Ceiling-1 Clamp-1 + Col2Im-15 Concat-1 Constant-1 ConvertLike-1 @@ -61,7 +64,9 @@ Operation Specifications Einsum-7 Elu-1 EmbeddingBagOffsetsSum-3 + EmbeddingBagOffsets-15 EmbeddingBagPackedSum-3 + EmbeddingBagPacked-15 EmbeddingSegmentsSum-3 Equal-1 Erf-1 @@ -189,6 +194,7 @@ Operation Specifications RNNSequence-5 ROIAlign-3 ROIAlign-9 + ROIAlignRotated-15 ROIPooling-1 Roll-7 Round-5 @@ -196,6 +202,7 @@ Operation Specifications ScatterElementsUpdate-3 ScatterElementsUpdate-12 ScatterNDUpdate-3 + ScatterNDUpdate-15 ScatterUpdate-3 SearchSorted-15 Select-1 @@ -208,6 +215,7 @@ Operation Specifications Sin-1 Sinh-1 Slice-8 + SliceScatter-15 SoftMax-1 SoftMax-8 SoftPlus-4 @@ -218,7 +226,10 @@ Operation Specifications Sqrt-1 SquaredDifference-1 Squeeze-1 + STFT-15 StridedSlice-1 + StringTensorPack-15 + StringTensorUnpack-15 Subtract-1 Swish-4 Tan-1 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst index 4a41df7214317c..bcc420f5db25c9 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst @@ -14,14 +14,14 @@ Short Time Fourier Transformation for real-valued input (STFT) **Short description**: *STFT* operation performs Short-Time Fourier Transform (real-to-complex). -**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued batched input tensor of shape ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. +**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued input tensor of shape ``[signal_size]`` or ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. **Attributes**: -* *transform_frames* +* *transpose_frames* - * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[2], otherwise it is at out_shape[1]. + * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[-2], otherwise it is at out_shape[-3]. * **Range of values**: * ``false`` - do not transpose output shape @@ -31,25 +31,25 @@ Short Time Fourier Transformation for real-valued input (STFT) **Inputs** -* **1**: ``signal`` - Tensor of type *T* and 2D shape [batch, signal_size] with signal data for the STFT. **Required.** -* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** -* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** -* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** +* **1**: ``signal`` - Tensor of type *T* and 1D shape [signal_size] or 2D shape [batch, signal_size] with signal data for the STFT. **Required.** +* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** +* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** +* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** **Outputs** -* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: +* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: - + When ``transform_frames == false`` the output shape is ``[batch, frames, fft_results, 2]`` - + When ``transform_frames == true`` the output shape is ``[batch, fft_results, frames, 2]`` + * When ``transpose_frames == false`` the output shape is ``[frames, fft_results, 2]`` for 1D signal input or ``[batch, frames, fft_results, 2]`` for 2D signal input. + * When ``transpose_frames == true`` the output shape is ``[fft_results, frames, 2]`` for 1D signal input or ``[batch, fft_results, frames, 2]`` for 2D signal input. - where: + where: - + ``batch`` is a batch size dimension - + ``frames`` is a number calculated as ``(signal_shape[1] - frame_size) / frame_step) + 1`` - + ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` - + ``2`` is the last dimension is for complex value real and imaginary part + * ``batch`` is a batch size dimension + * ``frames`` is a number calculated as ``(signal_shape[-1] - frame_size) / frame_step) + 1`` + * ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` + * ``2`` is the last dimension is for complex value real and imaginary part **Types** @@ -59,27 +59,109 @@ Short Time Fourier Transformation for real-valued input (STFT) * *T_INT*: ``int64`` or ``int32``. -**Example**: +**Examples**: + +*Example 1D signal, transpose_frames=false:* .. code-block:: xml :force: + + 56 + + + 7 + + + + + + 16 + 6 2 - 48 + + + + + +*Example 1D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 56 - 8 + 7 - - + + - + + 6 + 16 2 - 9 - 9 + + + + +*Example 2D signal, transpose_frames=false:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 16 + 6 + 2 + + + + + +*Example 2D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 6 + 16 2 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst index 81c592d3341a35..7a623a1e16739c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst @@ -17,29 +17,32 @@ SearchSorted **Attributes** -* *right* +* *right_mode* - * **Description**: If False, set the first suitable index. If True, return the last suitable index for given value. Default is False. - * **Range of values**: true or false - * **Type**: boolean + * **Description**: flag to control whether output would contain leftmost or rightmost indices for given values. + * **Range of values**: + + * *true* - return the rightmost (last) suitable index for given value. + * *false* - return the leftmost (first) suitable index for given value. + * **Type**: ``boolean`` * **Default value**: false * **Required**: *no* **Inputs**: -* **1**: ``sorted`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** +* **1**: ``sorted_sequence`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** * **2**: ``values`` - ND input tensor of type *T*, containing the search values. If sorted sequence is 1D, then the values can have any shape, otherwise the rank should be equal to the rank of sorted input. **Required.** **Outputs**: -* **1**: Tensor of type *TOut*, with the same shape as second input tensor, containing the indices. +* **1**: Tensor of type *T_IND*, with the same shape as second input tensor ``values``, containing the indices. **Types** * *T*: any supported floating-point and integer type. -* *TOut*: int64. +* *T_IND*: ``int64``. **Example** @@ -47,7 +50,7 @@ SearchSorted :force: - + 7 @@ -63,7 +66,7 @@ SearchSorted - + 7 256 200 diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index f1fd002b48072e..ebd4667d544616 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -9,6 +9,7 @@ Run LLM Inference on OpenVINO with the GenAI Flavor :hidden: NPU inference of LLMs + genai-guide/genai-use-cases This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering @@ -174,59 +175,6 @@ You can also create your custom streamer for more sophisticated processing: pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); } -Using GenAI in Chat Scenario -################################ - -For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs -may prove beneficial. The chat-specific methods **start_chat** and **finish_chat** are used to -mark a conversation session, as you can see in these simple examples: - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path) - - pipe.set_generation_config({'max_new_tokens': 100) - - pipe.start_chat() - while True: - print('question:') - prompt = input() - if prompt == 'Stop!': - break - print(pipe.generate(prompt)) - pipe.finish_chat() - - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - int main(int argc, char* argv[]) { - std::string prompt; - - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - ov::genai::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 100; - pipe.set_generation_config(config) - - pipe.start_chat(); - for (size_t i = 0; i < questions.size(); i++) { - std::cout << "question:\n"; - std::getline(std::cin, prompt); - - std::cout << pipe.generate(prompt) << std::endl; - } - pipe.finish_chat(); - } Optimizing Generation with Grouped Beam Search ####################################################### diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst new file mode 100644 index 00000000000000..953784c03fdef0 --- /dev/null +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -0,0 +1,433 @@ +GenAI Use Cases +===================== + +This article provides several use case scenarios for Generative AI model +inference. The applications presented in the code samples below +only require minimal configuration, like setting an inference device. Feel free +to explore and modify the source code as you need. + + +Using GenAI for Text-to-Image Generation +######################################## + +Examples below demonstrate inference on text-to-image models, like Stable Diffusion +1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` +sample shows basic usage of the ``Text2ImagePipeline`` pipeline. +:ref:`lora.cpp ` shows how to apply LoRA adapters to the pipeline. + + +.. tab-set:: + + .. tab-item:: Python + :sync: python + + .. tab-set:: + + .. tab-item:: main.py + :name: mainpy + + .. code-block:: python + + import openvino_genai + from PIL import Image + import numpy as np + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def infer(model_dir: str, prompt: str): + device = 'CPU' # GPU can be used as well + random_generator = Generator(42) + pipe = openvino_genai.Text2ImagePipeline(model_dir, device) + image_tensor = pipe.generate( + prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + random_generator=random_generator + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + .. tab-item:: LoRA.py + :name: lorapy + + .. code-block:: python + + import openvino as ov + import openvino_genai + import numpy as np + import sys + + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + + def infer(models_path: str, prompt: str): + prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" + + device = "CPU" # GPU, NPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + adapters=openvino_genai.AdapterConfig(), + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20 + ) + image_write("baseline.bmp", image) + + For more information, refer to the + `Python sample `__ + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: + + .. tab-item:: main.cpp + :name: maincpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1)); + + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + .. tab-item:: LoRA.cpp + :name: loracpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::AdapterConfig adapter_config; + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + + + + +Using GenAI in Speech Recognition +################################# + + +The application, shown in code samples below, performs inference on speech +recognition Whisper Models. The samples include the ``WhisperPipeline`` class +and use audio files in WAV format at a sampling rate of 16 kHz as input. + +.. tab-set:: + + .. tab-item:: Python + :sync: cpp + + .. code-block:: python + + import openvino_genai + import librosa + + + def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + + def infer(model_dir: str, wav_file_path: str): + raw_speech = read_wav(wav_file_path) + pipe = openvino_genai.WhisperPipeline(model_dir) + + def streamer(word: str) -> bool: + print(word, end="") + return False + + result = pipe.generate( + raw_speech, + max_new_tokens=100, + language="<|en|>", + task="transcribe", + return_timestamps=True, + streamer=streamer, + ) + + print() + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "audio_utils.hpp" + #include "openvino/genai/whisper_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (3 > argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + } + + std::filesystem::path models_path = argv[1]; + std::string wav_file_path = argv[2]; + std::string device = "CPU"; // GPU can be used as well + + ov::genai::WhisperPipeline pipeline(models_path, device); + + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + + ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); + config.max_new_tokens = 100; + config.language = "<|en|>"; + config.task = "transcribe"; + config.return_timestamps = true; + + auto streamer = [](std::string word) { + std::cout << word; + return false; + }; + + auto result = pipeline.generate(raw_speech, config, streamer); + + std::cout << "\n"; + + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__. + + +Using GenAI in Chat Scenario +############################ + +For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs +may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to +mark a conversation session, as shown in the samples below: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai + + + def streamer(subword): + print(subword, end='', flush=True) + return False + + + def infer(model_dir: str): + device = 'CPU' # GPU can be used as well. + pipe = openvino_genai.LLMPipeline(model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + pipe.generate(prompt, config, streamer) + print('\n----------') + pipe.finish_chat() + + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "openvino/genai/llm_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (2 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::function streamer = [](std::string word) { + std::cout << word << std::flush; + return false; + }; + + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, config, streamer); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + +Additional Resources +##################### + +* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>` +* `OpenVINO GenAI Repo `__ +* `OpenVINO GenAI Samples `__ +* `OpenVINO Tokenizers `__ diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst index 77cd0aca62021d..a26b670b5314d0 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst @@ -301,6 +301,19 @@ model to avoid extra computation. This is how it can be done for LLMs: Now the model can be converted to OpenVINO using Optimum Intel Python API or CLI interfaces mentioned above. +Execution on CPU device +########################## + +As mentioned in the :ref:`Composability of different threading runtimes ` section, OpenVINO's default threading runtime, +oneTBB, keeps CPU cores active for a while after inference is done. When using Optimum Intel Python API, +it calls Torch (via HF transformers) for postprocessing, such as beam search or gready search. +Torch uses OpenMP for threading, OpenMP needs to wait for CPU cores that are kept active by +oneTBB. By default, OpenMP uses the `busy-wait `__ which can delay the next OpenVINO inference as well. + +It is recommended to: + +* Limit the number of CPU threads used by Torch with `torch.set_num_threads `__. +* Set the environment variable `OMP_WAIT_POLICY `__ to `PASSIVE`, which disables OpenMP `busy-wait `__. Additional Resources ##################### diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index dc158fe9352042..a440f77bc79984 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -187,3 +187,23 @@ are executed in parallel. For details on multi-stream execution check the :doc:`optimization guide <../../optimize-inference/optimizing-throughput/advanced_throughput_options>`. + +.. _Composability_of_different_threading_runtimes: + +Composability of different threading runtimes +############################################# + +OpenVINO is by default built with the `oneTBB `__ threading library, +oneTBB has a feature `worker_wait`, similar to `OpenMP `__ `busy-wait `__, which makes OpenVINO inference +threads wait actively for a while after a task done. The intention is to avoid CPU inactivity in the +transition time between inference tasks. + +In the pipeline that runs OpenVINO inferences on the CPU along with other sequential application logic, using different threading runtimes (e.g., OpenVINO inferences use oneTBB, +while other application logic uses OpenMP) will cause both to occupy CPU cores for additional time after the task done, leading to overhead. + +Recommended solutions: + +- The most effective way is to use oneTBB for all computations made in the pipeline. +- Rebuild OpenVINO with OpenMP if other application logic uses OpenMP. +- Limit the number of threads for OpenVINO and other parts and let OS do the scheduling. +- If other application logic uses OpenMP, set the environment variable `OMP_WAIT_POLICY `__ to `PASSIVE` to disable OpenMP `busy-wait `__. diff --git a/docs/dev/build_riscv64.md b/docs/dev/build_riscv64.md index 13ab9e98d56958..19d6e8714b45fc 100644 --- a/docs/dev/build_riscv64.md +++ b/docs/dev/build_riscv64.md @@ -16,15 +16,23 @@ The software was validated on the following devices: - Python 3.10 for OpenVINO Runtime Python API ## How to build +Currently, there are three ways to build OpenVINO Runtime for 64-bit RISC-V platforms: + +1. **Recommended**. The build with vectorized (using RVV instructions) primitives for limited scope of operations from [`SHL`](https://github.com/XUANTIE-RV/csi-nn2) using [`xuantie-gnu-toolchain`](https://github.com/XUANTIE-RV/). This GNU Compiler Toolchain supports RVV 0.7.1, ratified RVV 1.0 and Xuantie-specific instruction sets. The vector intrinsics don't use the common prefix `__riscv_`. This method provides the best performance available at the moment. +2. The build without optimized primitives using [`riscv-gnu-toolchain`](https://github.com/riscv-collab/riscv-gnu-toolchain.git). This GNU Compiler Toolchain supports RVV 0.7.1 and ratified RVV 1.0. The vector intrinsics use the common prefix `__riscv_`. However, as mentioned earlier, this build method doesn't yet provide optimized primitives implemented using the RVV intrinsics. +3. The build without optimized primitives using installed Linux packages. The compilers in these packages don't support RVV intrinsics. + +### Steps + 0. Prerequisite: -- For target with RVV - build `xuantie-gnu-toolchain` and `qemu`: +- For target with vectorized primitives from `SHL` - build `xuantie-gnu-toolchain` and `qemu`: ```sh git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git cd xuantie-gnu-toolchain ./configure --prefix= make linux build-qemu -j$(nproc) ``` -- For target without RVV - build `riscv-gnu-toolchain`: +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh git clone https://github.com/riscv-collab/riscv-gnu-toolchain.git cd riscv-gnu-toolchain @@ -32,6 +40,11 @@ The software was validated on the following devices: make linux build-qemu -j$(nproc) ``` > **NOTE**: The `build-qemu` target is optional, as it is used to build the `qemu` simulator. However, it is recommended to build the `qemu` simulator, since it is much more convenient to validate the software on your host than on your devices. More information can be seen [here](https://github.com/riscv-collab/riscv-gnu-toolchain). +- For target without optimized primitives using installed Linux packages: + ```sh + apt-get update + apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + ``` 1. Clone OpenVINO repository and init submodules: ```sh @@ -50,8 +63,8 @@ The software was validated on the following devices: mkdir build && cd build ``` -4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT`. -- For target with RVV: +4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT` (the last one is needed only for build using GNU toolchain). +- For target with vectorized primitives from `SHL`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -59,8 +72,8 @@ The software was validated on the following devices: -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/ \ -DRISCV_TOOLCHAIN_ROOT= ``` - > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For exmaple, you can replace `` with `riscv64-071-thead-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-thead-gnu.toolchain.cmake` for RVV 1.0 respectively. -- For target without RVV: + > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For example, you can replace `` with `riscv64-071-xuantie-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-xuantie-gnu.toolchain.cmake` for RVV 1.0 respectively. +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -69,7 +82,13 @@ The software was validated on the following devices: -DRISCV_TOOLCHAIN_ROOT=/opt/riscv ``` > **NOTE**: The `riscv-gnu-toolchain` is build as there are essential files used for cross compilation under `/opt/riscv/sysroot`. The latest stable versions of Clang or GCC both support compiling source code into RISC-V instructions, so it is acceptable to choose your preferable compilers by specifying `-DCMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`. But remember to add the key `-DCMAKE_SYSROOT=/opt/riscv/sysroot`, otherwise many fundamental headers and libs could not be found during cross compilation. - +- For target without optimized primitives using installed Linux packages: + ```sh + cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX= \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/riscv64.linux.toolchain.cmake + ``` > **NOTE**: By default OpenVINO is built with OpenMP support on RISC-V devices. Then run `make` to build the project: diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index 10049485202cca..4a9761f5364046 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -25,7 +25,7 @@ Supported configurations: ```sh git clone https://github.com/openvinotoolkit/openvino.git cd openvino - git submodule update --init + git submodule update --init --recursive ``` 2. Create build directory: diff --git a/docs/dev/ci/commit_signoff_policy.md b/docs/dev/ci/commit_signoff_policy.md new file mode 100644 index 00000000000000..0328d3c3ec308c --- /dev/null +++ b/docs/dev/ci/commit_signoff_policy.md @@ -0,0 +1,74 @@ +# How to sign-off commits + +We require a sign-off commit message in the following format on each commit in pull request. + +``` +This is a commit message. + +Signed-off-by: Author Name +``` + +## How to sign-off new commits + +In a local Git environment, the sign-off message can be added to a commit either manually (as a text) +or via the **-s** flag used with the “git commit” command, for example: + +`git commit -s -m "My commit message"` + +To avoid manually adding the flag for each commit, we recommend setting up a Git hook with the following steps: + +1. Navigate to the `/.git/hooks` folder. +2. Open the `prepare-commit-msg.sample` file and paste the following content: + +``` +#!/bin/sh + +COMMIT_MSG_FILE=$1 +COMMIT_SOURCE=$2 +SHA1=$3 + +NAME=$(git config user.name) +EMAIL=$(git config user.email) + +if [ -z "$NAME" ]; then + echo "empty git config user.name" + exit 1 +fi + +if [ -z "$EMAIL" ]; then + echo "empty git config user.email" + exit 1 +fi + +git interpret-trailers --if-exists doNothing --trailer \ + "Signed-off-by: $NAME <$EMAIL>" \ + --in-place "$1" +``` + +3. Save the file with the name `prepare-commit-msg` (remove the .sample extension). +4. Make the file executable (on Linux / Git Bash: `chmod +x /.git/hooks/prepare-commit-msg`). + +**Note**: For both sign-off approaches, ensure your user name and email address are configured in Git first: + +``` +git config user.name 'FIRST_NAME LAST_NAME' +git config user.email 'MY_EMAIL@example.com' +``` + +### Sign-off web-based commits + +To enable automatic sign-off of commits made via GitHub web interface, make sure that +[Require contributors to sign off on web-based commits](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/managing-the-commit-signoff-policy-for-your-repository#enabling-or-disabling-compulsory-commit-signoffs-for-your-repository) +setting is selected in the Settings menu of your OpenVINO repository fork. + +## How to sign-off older commits in the history + +If you forget to add the sign-off to your last commit, you can amend it and force-push to GitHub: + +``` +git commit --amend --signoff +``` + +To sign off on even older commits, use an interactive rebase, edit unsigned commits, and execute +`git commit --amend --signoff` for each. However, please note that if others have already started working based on +the commits in this branch, this will rewrite history and may cause issues for collaborators. diff --git a/docs/documentation_build_instructions.md b/docs/documentation_build_instructions.md index d9219454b86a19..a1412cfc3f358d 100644 --- a/docs/documentation_build_instructions.md +++ b/docs/documentation_build_instructions.md @@ -26,7 +26,7 @@ $ source env/bin/activate ``` 5. Install the sphinx theme ``` -(env) $ cd docs/openvino_sphinx_theme && python setup.py install && cd - +(env) $ python -m pip install docs/openvino_sphinx_theme `````` 6. Install the custom sphinx sitemap ``` diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index c6965d054f0991..75c8111bafcc6b 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241007220823/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241022220806/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index cdb1e4ff5083cd..e5e8e3813b5173 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -11,14 +11,14 @@ of this notebook, you will see live inference results from your webcam out the algorithms. **Make sure you have properly installed the**\ `Jupyter extension `__\ **and -been using JupyterLab to run the demo as suggested in the** -``README.md`` +been using JupyterLab to run the demo as suggested in the +``README.md``** **NOTE**: *To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a remote server, the webcam will not work. However, you can still do inference on a video file in - the final step. This demo utilizes the Python interface in* - ``Three.js`` *integrated with WebGL to process data from the model + the final step. This demo utilizes the Python interface in + ``Three.js`` integrated with WebGL to process data from the model inference. These results are processed and displayed in the notebook.* @@ -87,7 +87,7 @@ Prerequisites -**The** ``pythreejs`` **extension may not display properly when using a +**The ``pythreejs`` extension may not display properly when using a Jupyter Notebook release. Therefore, it is recommended to use Jupyter Lab instead.** @@ -109,61 +109,61 @@ Lab instead.** Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) Collecting onnx<1.16.2 Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) Collecting openvino==2024.4.0 (from openvino-dev>=2024.0.0) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) - Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB) + Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting protobuf>=3.20.2 (from onnx<1.16.2) Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl (4.7 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) @@ -175,11 +175,11 @@ Lab instead.** Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) - Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB) + Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.9.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -193,28 +193,28 @@ Imports import collections import time from pathlib import Path - + import cv2 import ipywidgets as widgets import numpy as np from IPython.display import clear_output, display import openvino as ov - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) with open("notebook_utils.py", "w") as f: f.write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", ) with open("engine3js.py", "w") as f: f.write(r.text) - + import notebook_utils as utils import engine3js as engine @@ -236,19 +236,19 @@ directory structure and downloads the selected model. # directory where model will be downloaded base_model_dir = "model" - + # model name as named in Open Model Zoo model_name = "human-pose-estimation-3d-0001" # selected precision (FP32, FP16) precision = "FP32" - + BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}" model_path = Path(BASE_MODEL_NAME).with_suffix(".pth") onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx") - + ir_model_path = Path(f"model/public/{model_name}/{precision}/{model_name}.xml") model_weights_path = Path(f"model/public/{model_name}/{precision}/{model_name}.bin") - + if not model_path.exists(): download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" ! $download_command @@ -257,12 +257,12 @@ directory structure and downloads the selected model. .. parsed-literal:: ################|| Downloading human-pose-estimation-3d-0001 ||################ - + ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - + + ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - + Convert Model to OpenVINO IR format @@ -287,22 +287,22 @@ IR format. .. parsed-literal:: ========== Converting human-pose-estimation-3d-0001 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(weights, map_location='cpu')) ONNX check passed successfully. - + ========== Converting human-pose-estimation-3d-0001 to IR (FP32) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False - + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False + [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. + In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin - + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin + Select inference device @@ -315,7 +315,7 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 device = utils.device_widget() - + device @@ -350,7 +350,7 @@ created to infer the compiled model. compiled_model = core.compile_model(model=model, device_name=device.value) infer_request = compiled_model.create_infer_request() input_tensor_name = model.inputs[0].get_any_name() - + # get input and output names of nodes input_layer = compiled_model.input(0) output_layers = list(compiled_model.outputs) @@ -390,25 +390,25 @@ input for the 3D model. This is how you obtain the output heat maps, PAF def model_infer(scaled_img, stride): """ Run model inference on the input image - + Parameters: scaled_img: resized image according to the input size of the model stride: int, the stride of the window """ - + # Remove excess space from the picture img = scaled_img[ 0 : scaled_img.shape[0] - (scaled_img.shape[0] % stride), 0 : scaled_img.shape[1] - (scaled_img.shape[1] % stride), ] - + img = np.transpose(img, (2, 0, 1))[None,] infer_request.infer({input_tensor_name: img}) # A set of three inference results is obtained results = {name: infer_request.get_tensor(name).data[:] for name in {"features", "heatmaps", "pafs"}} # Get the results results = (results["features"][0], results["heatmaps"][0], results["pafs"][0]) - + return results Draw 2D Pose Overlays @@ -448,8 +448,8 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + body_edges_2d = np.array( [ [0, 1], # neck - nose @@ -471,25 +471,25 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + def draw_poses(frame, poses_2d, scaled_img, use_popup): """ Draw 2D pose overlays on the image to visualize estimated poses. Joints are drawn as circles and limbs are drawn as lines. - + :param frame: the input image :param poses_2d: array of human joint pairs """ for pose in poses_2d: pose = np.array(pose[0:-1]).reshape((-1, 3)).transpose() was_found = pose[2] > 0 - + pose[0], pose[1] = ( pose[0] * frame.shape[1] / scaled_img.shape[1], pose[1] * frame.shape[0] / scaled_img.shape[0], ) - + # Draw joints. for edge in body_edges_2d: if was_found[edge[0]] and was_found[edge[1]]: @@ -512,7 +512,7 @@ from Open Model Zoo. -1, cv2.LINE_AA, ) - + return frame Main Processing Function @@ -529,18 +529,18 @@ webcam feed or a video file. """ 2D image as input, using OpenVINO as inference backend, get joints 3D coordinates, and draw 3D human skeleton in the scene - + :param source: The webcam number to feed the video stream with primary webcam set to "0", or the video path. :param flip: To be used by VideoPlayer function for flipping capture image. :param use_popup: False for showing encoded frames over this notebook, True for creating a popup window. :param skip_frames: Number of frames to skip at the beginning of the video. """ - + focal_length = -1 # default stride = 8 player = None skeleton_set = None - + try: # create video player to play with target fps video_path # get the frame from camera @@ -548,16 +548,16 @@ webcam feed or a video file. player = utils.VideoPlayer(source, flip=flip, fps=30, skip_first_frames=skip_frames) # start capturing player.start() - + input_image = player.next() # set the window size resize_scale = 450 / input_image.shape[1] windows_width = int(input_image.shape[1] * resize_scale) windows_height = int(input_image.shape[0] * resize_scale) - + # use visualization library engine3D = engine.Engine3js(grid=True, axis=True, view_width=windows_width, view_height=windows_height) - + if use_popup: # display the 3D human pose in this notebook, and origin frame in popup window display(engine3D.renderer) @@ -567,43 +567,43 @@ webcam feed or a video file. # set the 2D image box, show both human pose and image in the notebook imgbox = widgets.Image(format="jpg", height=windows_height, width=windows_width) display(widgets.HBox([engine3D.renderer, imgbox])) - + skeleton = engine.Skeleton(body_edges=body_edges) - + processing_times = collections.deque() - + while True: # grab the frame frame = player.next() if frame is None: print("Source ended") break - + # resize image and change dims to fit neural network input # (see https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/human-pose-estimation-3d-0001) scaled_img = cv2.resize(frame, dsize=(model.inputs[0].shape[3], model.inputs[0].shape[2])) - + if focal_length < 0: # Focal length is unknown focal_length = np.float32(0.8 * scaled_img.shape[1]) - + # inference start start_time = time.time() # get results inference_result = model_infer(scaled_img, stride) - + # inference stop stop_time = time.time() processing_times.append(stop_time - start_time) # Process the point to point coordinates of the data poses_3d, poses_2d = engine.parse_poses(inference_result, 1, stride, focal_length, True) - + # use processing times from last 200 frames if len(processing_times) > 200: processing_times.popleft() - + processing_time = np.mean(processing_times) * 1000 fps = 1000 / processing_time - + if len(poses_3d) > 0: # From here, you can rotate the 3D point positions using the function "draw_poses", # or you can directly make the correct mapping below to properly display the object image on the screen @@ -616,28 +616,28 @@ webcam feed or a video file. -y + np.ones(poses_3d[:, 2::4].shape) * 100, -x, ) - + poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] people = skeleton(poses_3d=poses_3d) - + try: engine3D.scene_remove(skeleton_set) except Exception: pass - + engine3D.scene_add(people) skeleton_set = people - + # draw 2D frame = draw_poses(frame, poses_2d, scaled_img, use_popup) - + else: try: engine3D.scene_remove(skeleton_set) skeleton_set = None except Exception: pass - + cv2.putText( frame, f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -648,7 +648,7 @@ webcam feed or a video file. 1, cv2.LINE_AA, ) - + if use_popup: cv2.imshow(title, frame) key = cv2.waitKey(1) @@ -662,9 +662,9 @@ webcam feed or a video file. frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90], )[1].tobytes() - + engine3D.renderer.render(engine3D.scene, engine3D.cam) - + except KeyboardInterrupt: print("Interrupted") except RuntimeError as e: @@ -711,10 +711,10 @@ picture on the left to interact. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_path = "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4" - + source = cam_id if USE_WEBCAM else video_path - + run_pose_estimation(source=source, flip=isinstance(source, int), use_popup=False) diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index ce27c3006ac36a..14107e6290f1c7 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -46,20 +46,12 @@ Guide =2023.1.0" "tqdm" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -227,7 +219,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_59833/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_2578096/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -321,7 +313,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_59833/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_2578096/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output.rst b/docs/notebooks/action-recognition-webcam-with-output.rst index 6f9ff9a062ace7..a074549965ea7c 100644 --- a/docs/notebooks/action-recognition-webcam-with-output.rst +++ b/docs/notebooks/action-recognition-webcam-with-output.rst @@ -683,11 +683,16 @@ multi-camera systems). run_action_recognition(source=source, use_popup=False, **additional_options) +.. parsed-literal:: -.. image:: action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png + Cannot open https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4 .. parsed-literal:: - Source ended + [ WARN:0@6.113] global cap.cpp:164 open VIDEOIO(CV_IMAGES): raised OpenCV exception: + + OpenCV(4.10.0) /io/opencv/modules/videoio/src/cap_images.cpp:244: error: (-5:Bad argument) CAP_IMAGES: error, expected '0?[1-9][du]' pattern, got: https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4 in function 'icvExtractPattern' + + diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png deleted file mode 100644 index 2bb95f87f90515..00000000000000 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9ed14955798d0fbcdc284da02f19a12ae92c89fe6c1f4760951a414f4047b66 -size 68016 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index 62e4b205f45f75..9a907132b29c75 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -54,11 +54,12 @@ notebooks/language-quantize-bert/language-quantize-bert.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-image-generation.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb -notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb -notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-optimum.ipynb notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +notebooks/llm-agent-react/llm-agent-react.ipynb notebooks/llm-agent-react/llm-agent-react-langchain.ipynb notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb notebooks/llm-chatbot/llm-chatbot.ipynb @@ -148,7 +149,6 @@ notebooks/tflite-selfie-segmentation/tflite-selfie-segmentation.ipynb notebooks/tflite-to-openvino/tflite-to-openvino.ipynb notebooks/tiny-sd-image-generation/tiny-sd-image-generation.ipynb notebooks/torchvision-zoo-to-openvino/convnext-classification.ipynb -notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb notebooks/typo-detector/typo-detector.ipynb notebooks/vehicle-detection-and-recognition/vehicle-detection-and-recognition.ipynb notebooks/vision-background-removal/vision-background-removal.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index 880bf539025c5e..23e564e135d9ae 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -226,23 +226,23 @@ Convert the Text Encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -351,13 +351,13 @@ suitable. This function repeats part of ``AmusedPipeline``. .. parsed-literal:: - /tmp/ipykernel_60662/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_2578393/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -490,7 +490,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -707,7 +707,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -760,17 +760,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -794,7 +794,7 @@ Demo generation with quantized pipeline .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -878,7 +878,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -890,7 +890,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() @@ -907,8 +907,8 @@ a rough estimate of generation quality. .. parsed-literal:: - Quantized pipeline Inception Score: 11.073053359985352 - Quantization speed-up: 2.07x + Quantized pipeline Inception Score: 11.0730562210083 + Quantization speed-up: 2.09x Interactive inference diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index 15459596dea5bf..a7debae86cef3f 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -1,7 +1,8 @@ Image-to-Video synthesis with AnimateAnyone and OpenVINO ======================================================== -|image0| +.. image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif + `AnimateAnyone `__ tackles the task of generating animation sequences from a single character image. It @@ -36,9 +37,7 @@ repo `__ and .. warning:: - - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. - + This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT`` ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. **Table of contents:** @@ -71,9 +70,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif - - Prerequisites ------------- @@ -104,12 +100,6 @@ Prerequisites %load_ext skip_kernel_extension - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - - Note that we clone a fork of original repo with tweaked forward methods. .. code:: ipython3 @@ -164,11 +154,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -192,7 +182,7 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino Prepare base model @@ -216,13 +206,6 @@ Prepare base model local_dir=local_dir, ) - - -.. parsed-literal:: - - diffusion_pytorch_model.bin: 0%| | 0.00/3.44G [00:00:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - - Convert model to OpenVINO IR ---------------------------- @@ -430,7 +324,7 @@ semantic features are extracted through the CLIP image encoder for Cross-Attention. Temporal-Attention operates in the temporal dimension. Finally, the VAE decoder decodes the result into a video clip. -|image01| +.. image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png The pipeline contains 6 PyTorch modules: @@ -470,8 +364,6 @@ compression parameters. More details about weights compression can be found in `OpenVINO documentation `__. -.. |image01| image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png - .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -529,12 +421,14 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: + WARNING:nncf:NNCF provides best results with torch==2.1.2, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.1.2 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (32 / 32) │ 100% (32 / 32) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (32 / 32) | 100% (32 / 32) | + +--------------+---------------------------+-----------------------------------+ @@ -550,6 +444,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -575,11 +477,12 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (40 / 40) | 100% (40 / 40) | + +--------------+---------------------------+-----------------------------------+ @@ -595,6 +498,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + Reference UNet ~~~~~~~~~~~~~~ @@ -641,11 +552,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (270 / 270) │ 100% (270 / 270) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (270 / 270) | 100% (270 / 270) | + +--------------+---------------------------+-----------------------------------+ @@ -661,6 +573,14 @@ step. + + + + + + + + Denoising UNet ~~~~~~~~~~~~~~ @@ -734,11 +654,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (534 / 534) │ 100% (534 / 534) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (534 / 534) | 100% (534 / 534) | + +--------------+---------------------------+-----------------------------------+ @@ -754,6 +675,14 @@ step. + + + + + + + + Pose Guider ~~~~~~~~~~~ @@ -780,11 +709,12 @@ efficiently integrate pose control signals into the denoising process. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (8 / 8) │ 100% (8 / 8) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (8 / 8) | 100% (8 / 8) | + +--------------+---------------------------+-----------------------------------+ @@ -800,6 +730,14 @@ efficiently integrate pose control signals into the denoising process. + + + + + + + + Image Encoder ~~~~~~~~~~~~~ @@ -825,18 +763,19 @@ required for both reference and denoising UNets. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (146 / 146) │ 100% (146 / 146) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (146 / 146) | 100% (146 / 146) | + +--------------+---------------------------+-----------------------------------+ @@ -852,6 +791,14 @@ required for both reference and denoising UNets. + + + + + + + + Inference --------- @@ -877,6 +824,15 @@ For starting work, please select inference device from dropdown list. device = device_widget() + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=5, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'GPU.3', 'AUTO'), value='A… + + + .. code:: ipython3 class OVPose2VideoPipeline(Pose2VideoPipeline): @@ -1174,7 +1130,7 @@ Video post-processing .. raw:: html @@ -1248,23 +1204,9 @@ Interactive inference demo = make_demo(fn=generate) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(debug=False, share=True) + demo.queue().launch(debug=True, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/" - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 135554acc38de4..e8f5b80d429d81 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -58,21 +58,14 @@ Imports .. code:: ipython3 - import platform - %pip install -q "openvino>=2023.1.0" - %pip install -q opencv-python - if platform.system() != "windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q opencv-python "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -355,7 +348,7 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 63.95 fps + average throuput in sync mode: 63.79 fps Async Mode @@ -494,7 +487,7 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 108.46 fps + average throuput in async mode: 106.83 fps Compare the performance @@ -637,5 +630,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 144.01 fps + average throughput in async mode with async infer queue: 145.36 fps diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index f80b64476e19ea..f3492582efc67f 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b164e09df4e90dc87d63caf35cc832021fbd147354a5300605164fce212e36b8 -size 29453 +oid sha256:84a9b6aab4a04feb319b3243644da6837b3b894122657a8f6639fa604e3b48dd +size 29468 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 5f7d8dfc61502f..28d894eb72b22e 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -82,18 +82,15 @@ Import modules and create Core import platform # Install required packages - %pip install -q "openvino>=2023.1.0" "numpy<2" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.1.0" "matplotlib>=3.4" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -200,16 +197,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [23:32:04.6480]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [23:32:04.6481]I[schedule.cpp:17][AUTO] scheduler starting - [23:32:04.6481]I[auto_schedule.cpp:181][AUTO] select device:CPU - [23:32:04.7787]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 130.622171 ms - [23:32:04.7789]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [23:32:04.7790]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [22:37:15.4888]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [22:37:15.4888]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [22:37:15.4888]I[schedule.cpp:17][AUTO] scheduler starting + [22:37:15.4888]I[auto_schedule.cpp:181][AUTO] select device:CPU + [22:37:15.5995]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 110.638049 ms + [22:37:15.5996]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [22:37:15.5997]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -222,8 +219,8 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - Deleted compiled_model - [23:32:04.7847]I[schedule.cpp:308][AUTO] scheduler ending + Deleted compiled_model[22:37:15.6060]I[schedule.cpp:308][AUTO] scheduler ending + Explicitly pass AUTO as device_name to Core::compile_model API @@ -381,7 +378,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.13 seconds. + Time to load model using AUTO device and get first inference: 0.15 seconds. .. code:: ipython3 @@ -556,12 +553,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 183.87fps, latency: 31.26ms, time interval: 10.01s - throughput: 184.60fps, latency: 31.70ms, time interval: 10.00s - throughput: 183.24fps, latency: 31.93ms, time interval: 10.01s - throughput: 184.05fps, latency: 31.75ms, time interval: 10.00s - throughput: 184.40fps, latency: 31.77ms, time interval: 10.00s - throughput: 178.41fps, latency: 32.83ms, time interval: 10.02s + throughput: 184.25fps, latency: 31.12ms, time interval: 10.02s + throughput: 184.19fps, latency: 31.80ms, time interval: 10.00s + throughput: 183.00fps, latency: 32.00ms, time interval: 10.01s + throughput: 183.37fps, latency: 31.91ms, time interval: 10.01s + throughput: 178.30fps, latency: 32.90ms, time interval: 10.01s + throughput: 182.80fps, latency: 32.08ms, time interval: 10.01s Done @@ -607,12 +604,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 140.52fps, latency: 6.62ms, time interval: 10.01s - throughput: 142.84fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.14fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.63fps, latency: 6.60ms, time interval: 10.00s - throughput: 143.11fps, latency: 6.61ms, time interval: 10.01s - throughput: 132.99fps, latency: 7.13ms, time interval: 10.01s + throughput: 139.34fps, latency: 6.64ms, time interval: 10.00s + throughput: 141.45fps, latency: 6.63ms, time interval: 10.00s + throughput: 141.42fps, latency: 6.63ms, time interval: 10.01s + throughput: 141.70fps, latency: 6.62ms, time interval: 10.01s + throughput: 130.57fps, latency: 7.22ms, time interval: 10.00s + throughput: 141.61fps, latency: 6.62ms, time interval: 10.01s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index 5ef6531526d989..af14e6c0ac24c7 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cf9ec5f2d8e34510d31d10190c4d7f269bb83a2800891ff865e09ee85e80d95 -size 27103 +oid sha256:05bc2541cede3c32f2f9acff3f93d19572aab80a5bb7ac3e6c77bb397817bb54 +size 25899 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index edc7de70aeb565..e12bb8a48eaa0e 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed265008638a6c38fbfc8397e6bb20d0e1fc900df893f30b7238c518d2380b2b -size 40093 +oid sha256:b1e2e56c28c18b5d31607518b846e78c58e76b2b59a9ee083d02e0d8f8ec2b52 +size 40077 diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 2e5a45826eaceb..3da831e6d9d0dd 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -112,14 +112,7 @@ tokenizer and preparing the images. .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "matplotlib>=3.4" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow import requests @@ -736,6 +729,7 @@ up of the dynamic quantized models. Interactive demo ---------------- + Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index a922fe445a7897..3ab43d897ea658 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -198,7 +198,7 @@ Prerequisites .. code:: ipython3 %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" - %pip install -q "diffusers>=0.14.0" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "diffusers>=0.14.0" "matplotlib>=3.4" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" import requests diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index e5a66d4f74ae17..8ec32adef0e04a 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -39,15 +39,12 @@ Guide =2024.0.0" "requests" "tqdm" "transformers>=4.31" "onnx<1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" + "openvino>=2024.4.0" "requests" "tqdm" "transformers>=4.31" "onnx!=1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) - Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -192,11 +189,11 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-10-07 23:35:08.862114: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:08.907564: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:40:21.522113: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:40:21.555890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:09.444317: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + 2024-10-22 22:40:22.084160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -673,7 +670,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-10-07 23:35:26.468759: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-10-22 22:40:40.138322: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... @@ -724,28 +721,12 @@ Resnet50 model that was exported to the ONNX format: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 +.. code:: python - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") - - -.. parsed-literal:: - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - - -.. parsed-literal:: - - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") Changing Model Layout ^^^^^^^^^^^^^^^^^^^^^ @@ -772,26 +753,17 @@ and the layout of an original model: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") - - # alternatively use source_layout and target_layout parameters - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") +Legacy Model Optimizer API +========================== +.. code:: python -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") + # alternatively use source_layout and target_layout parameters + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") Specifying Mean and Scale Values ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -819,25 +791,18 @@ more examples. ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - - ov_model = mo.convert_model( - ONNX_CV_MODEL_PATH, - mean_values=[255 * x for x in [0.485, 0.456, 0.406]], - scale_values=[255 * x for x in [0.229, 0.224, 0.225]], - ) +.. code:: python + # Legacy Model Optimizer API -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model( + ONNX_CV_MODEL_PATH, + mean_values=[255 * x for x in [0.485, 0.456, 0.406]], + scale_values=[255 * x for x in [0.229, 0.224, 0.225]], + ) Reversing Input Channels ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -862,20 +827,12 @@ the color channels before inference. prep.input("input.1").preprocess().reverse_channels() ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) - - -.. parsed-literal:: +.. code:: python - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) Cutting Off Parts of a Model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 99bc625a13c257..1204ea2c17f106 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.4793865978717804 + Predicted Probability: 0.5351971983909607 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst index 87fad52e92709a..b116f0e1f5cda1 100644 --- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst @@ -69,14 +69,7 @@ Guide =3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets "matplotlib>=3.4" Get Books --------- diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index f60ccb2fadd9e6..6f003fb71d75fb 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -98,20 +98,12 @@ Guide =2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" "matplotlib>=3.4" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -162,10 +154,10 @@ Imports .. parsed-literal:: - 2024-10-07 23:35:52.753512: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:52.788105: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:41:03.560708: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:41:03.596278: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:53.378916: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:41:04.191138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -231,7 +223,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_72009/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_2583350/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -452,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -534,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 249913 / 262144 (95.3%) - Greatest absolute difference: 4.628173828125 at index (0, 0, 430, 337) (up to 1e-05 allowed) - Greatest relative difference: 31968.152067381572 at index (0, 0, 102, 269) (up to 1e-05 allowed) + Mismatched elements: 249255 / 262144 (95.1%) + Greatest absolute difference: 3.8265769481658936 at index (0, 0, 126, 353) (up to 1e-05 allowed) + Greatest relative difference: 18364.59337143498 at index (0, 0, 305, 451) (up to 1e-05 allowed) _check_trace( @@ -671,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.78 ms + [ INFO ] Read model took 8.83 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -685,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 236.15 ms + [ INFO ] Compile model took 250.74 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -722,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 48.84 ms + [ INFO ] First inference took 51.21 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 429 iterations - [ INFO ] Duration: 15015.79 ms + [ INFO ] Count: 392 iterations + [ INFO ] Duration: 15027.48 ms [ INFO ] Latency: - [ INFO ] Median: 34.71 ms - [ INFO ] Average: 34.77 ms - [ INFO ] Min: 34.38 ms - [ INFO ] Max: 37.16 ms - [ INFO ] Throughput: 28.57 FPS + [ INFO ] Median: 34.96 ms + [ INFO ] Average: 38.10 ms + [ INFO ] Min: 34.49 ms + [ INFO ] Max: 48.05 ms + [ INFO ] Throughput: 26.09 FPS .. code:: ipython3 @@ -758,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.92 ms + [ INFO ] Read model took 11.01 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -772,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 239.20 ms + [ INFO ] Compile model took 238.95 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -809,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.32 ms + [ INFO ] First inference took 30.03 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 883 iterations - [ INFO ] Duration: 15004.05 ms + [ INFO ] Count: 952 iterations + [ INFO ] Duration: 15015.31 ms [ INFO ] Latency: - [ INFO ] Median: 15.57 ms - [ INFO ] Average: 16.79 ms - [ INFO ] Min: 15.15 ms - [ INFO ] Max: 22.01 ms - [ INFO ] Throughput: 58.85 FPS + [ INFO ] Median: 15.55 ms + [ INFO ] Average: 15.57 ms + [ INFO ] Min: 15.23 ms + [ INFO ] Max: 17.26 ms + [ INFO ] Throughput: 63.40 FPS Visually Compare Inference Results @@ -913,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1728337035 + Visualizing results with seed 1729629747 @@ -996,8 +988,8 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: - Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.32 seconds, fps:29.73 + Loaded model to AUTO in 0.18 seconds. + Total time for 68 frames: 2.32 seconds, fps:29.79 References @@ -1010,7 +1002,7 @@ Repository `__ - `Neural Network Compression Framework for fast model inference `__ - `OpenVINO API Tutorial `__ - `OpenVINO PyPI (pip -install openvino-dev) `__ +install openvino) `__ **Kits19 Data** - `Kits19 Challenge Homepage `__ - `Kits19 GitHub diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index 03b5eb1e3fd9f0..f7c163c1c77604 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1aac55db34be1df744fd19868762a8b4572a8e19683af72a57d7176b1486af0c -size 380239 +oid sha256:5d329fc21292f69aeee8164a3f805f2e8e61369c42eab565ebafe555cf6d1a1c +size 381131 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index ccd6216d26268d..b2a76fd1a0ded8 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,9 +25,8 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! - -**Table of contents:** +🪄 Let’s start to explore magic of image colorization! #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ @@ -68,7 +67,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -89,14 +88,14 @@ Prerequisites import sys from pathlib import Path import requests - + repo_dir = Path("DDColor") - + if not repo_dir.exists(): !git clone https://github.com/piddnad/DDColor.git - + sys.path.append(str(repo_dir)) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -110,7 +109,7 @@ Prerequisites remote: Counting objects: 100% (76/76), done. remote: Compressing objects: 100% (42/42), done. remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 17.27 MiB/s, done. + Receiving objects: 100% (233/233), 13.34 MiB | 641.00 KiB/s, done. Resolving deltas: 100% (80/80), done. @@ -129,6 +128,13 @@ Prerequisites except Exception: from inference.colorization_pipeline_hf import DDColorHF, ImageColorizationPipelineHF + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + Load PyTorch model ------------------ @@ -143,14 +149,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -163,12 +169,12 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) @@ -207,9 +213,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -224,11 +230,11 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 from notebook_utils import device_widget - + core = ov.Core() - + device = device_widget() - + device @@ -250,36 +256,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -318,7 +324,7 @@ improve model inference speed. .. code:: ipython3 from notebook_utils import quantization_widget - + to_quantize = quantization_widget() to_quantize @@ -334,15 +340,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -357,12 +363,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -374,7 +380,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -386,9 +392,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -406,10 +412,10 @@ Perform model quantization .. parsed-literal:: - 2024-10-07 23:39:33.824396: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:39:33.863560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:45:07.339219: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:45:07.378241: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:39:34.271973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:45:07.784302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -446,7 +452,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -466,9 +472,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -507,17 +513,17 @@ Tool =2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" + %pip install -q "openvino>=2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" "matplotlib>=3.4" %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu @@ -240,7 +241,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_74875/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_2586231/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -272,7 +273,7 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + @@ -306,13 +307,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -404,7 +405,7 @@ range. .. parsed-literal:: - + @@ -626,7 +627,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 14.01 seconds. Total FPS (including video processing): 4.28.Inference FPS: 9.46 + Processed 60 frames in 13.44 seconds. Total FPS (including video processing): 4.46.Inference FPS: 10.42 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -653,7 +654,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -786,10 +787,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:47:57.736195: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:47:57.768920: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 22:53:01.689628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 22:53:01.723459: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:47:58.341833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 22:53:02.312695: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -915,10 +916,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.89 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.78 + Processed 60 frames in 12.93 seconds. Total FPS (including video processing): 4.64.Inference FPS: 12.78 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -998,9 +999,9 @@ Tool =2023.3.0" "datasets>=2.14.6" "nncf" "tqdm" - %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" + %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" "matplotlib>=3.4" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu if platform.python_version_tuple()[1] in ["8", "9"]: @@ -90,9 +90,9 @@ Prerequisites remote: Counting objects: 100% (161/161), done. remote: Compressing objects: 100% (120/120), done. remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) - Receiving objects: 100% (441/441), 237.90 MiB | 23.22 MiB/s, done. + Receiving objects: 100% (441/441), 237.90 MiB | 25.06 MiB/s, done. Resolving deltas: 100% (158/158), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. WARNING: typer 0.12.5 does not provide the extra 'all' @@ -284,13 +284,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -573,7 +573,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.58 seconds. Total FPS (including video processing): 4.42.Inference FPS: 10.20 + Processed 60 frames in 13.51 seconds. Total FPS (including video processing): 4.44.Inference FPS: 10.15 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -600,7 +600,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -733,10 +733,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:57:01.421550: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:57:01.453134: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 23:02:01.251241: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 23:02:01.284565: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:57:02.034824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 23:02:01.862503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -862,10 +862,10 @@ data. .. parsed-literal:: - Processed 60 frames in 13.06 seconds. Total FPS (including video processing): 4.59.Inference FPS: 12.23 + Processed 60 frames in 12.84 seconds. Total FPS (including video processing): 4.67.Inference FPS: 12.71 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -945,9 +945,9 @@ Tool =2023.1.0" - + import os import requests + from pathlib import Path + import platform + + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + with open("notebook_utils.py", "w") as f: + f.write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + if not Path("pip_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + from pip_helper import pip_install + + if platform.system() == "Darwin": + pip_install("numpy<2.0.0") + pip_install("torch", "torchvision", "opencv-python", "wheel", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("git+https://github.com/facebookresearch/detectron2.git", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("openvino>=2023.1.0") .. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.44.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Collecting git+https://github.com/facebookresearch/detectron2.git + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-we1e_5gi +.. parsed-literal:: + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-we1e_5gi -.. parsed-literal:: - 24692 +.. parsed-literal:: + Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 + Preparing metadata (setup.py): started + Preparing metadata (setup.py): finished with status 'done' + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Collecting cloudpickle (from detectron2==0.6) + Using cached cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB) + Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6) + Using cached fvcore-0.1.5.post20221221-py3-none-any.whl + Collecting hydra-core>=1.1 (from detectron2==0.6) + Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) + Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) + Using cached iopath-0.1.9-py3-none-any.whl.metadata (370 bytes) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Requirement already satisfied: omegaconf<2.4,>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.3.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) + Collecting pycocotools>=2.0.2 (from detectron2==0.6) + Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.5) + Collecting yacs>=0.1.8 (from detectron2==0.6) + Using cached yacs-0.1.8-py3-none-any.whl.metadata (639 bytes) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Collecting portalocker (from iopath<0.1.10,>=0.1.7->detectron2==0.6) + Using cached portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.0) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.4) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) + Using cached iopath-0.1.9-py3-none-any.whl (27 kB) + Using cached pycocotools-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (439 kB) + Using cached yacs-0.1.8-py3-none-any.whl (14 kB) + Using cached cloudpickle-3.1.0-py3-none-any.whl (22 kB) + Using cached portalocker-2.10.1-py3-none-any.whl (18 kB) + Building wheels for collected packages: detectron2 + Building wheel for detectron2 (setup.py): started + Building wheel for detectron2 (setup.py): finished with status 'done' + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313552 sha256=23ceb6e5b734ecc530172b613be139d732deaa2e962d5a8bc940e6b23a85309d + Stored in directory: /tmp/pip-ephem-wheel-cache-65iaghs7/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Successfully built detectron2 + Installing collected packages: yacs, portalocker, cloudpickle, iopath, hydra-core, pycocotools, fvcore, detectron2 + Successfully installed cloudpickle-3.1.0 detectron2-0.6 fvcore-0.1.5.post20221221 hydra-core-1.3.2 iopath-0.1.9 portalocker-2.10.1 pycocotools-2.0.7 yacs-0.1.8 + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index ffb8e4a0030770..6160562199f757 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2341ffe8acbda0ad14e43fca01d72733855b5bde3b29601f9bbeaa4d6ff41207 -size 58357 +oid sha256:b6475f9155bca7152327255fb36bac32dea6f10aa834b6a91d46d4ecc718be0f +size 58279 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 561c47897650fb..77ac4e5e0c1fd8 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce0e6f8e05d4a4e52b304aa95e729a9fac0def06f80f61feacf0405f95dbb31f -size 509296 +oid sha256:b98f4e2e56bb62e6d7c68e536d05695f16b412d4bb8a502f5ced3c466716fe91 +size 508620 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index e8c8278cf4c90d..98087b7c1f10f2 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47fc91d79709effd086bc71f8586af4bc47ce40a460e1d886a10fb3abf0ce2d8 -size 56091 +oid sha256:02cb8226997c872abaee3c70c1ab90c8e6ac078adb81ec6d0721ece6049e7af3 +size 54825 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index cf32ec81286190..e317a3a3005ef6 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afd0387776f050660082e8adde8af8958eaf586f20e55da75b392f53362487ca -size 459024 +oid sha256:940552cfe8d62878a7b2fb827529df2ef6ef4fa135f82f578362142650e0d751 +size 457741 diff --git a/docs/notebooks/distil-whisper-asr-with-output.rst b/docs/notebooks/distil-whisper-asr-with-output.rst index fdd81327b5675a..2cdecfe17a19d4 100644 --- a/docs/notebooks/distil-whisper-asr-with-output.rst +++ b/docs/notebooks/distil-whisper-asr-with-output.rst @@ -85,9 +85,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "transformers>=4.35" "torch>=2.1,<2.4.0" "torchvision<0.19.0" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.35" "torch>=2.4.1" "onnx!=1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.0" "librosa" "soundfile" + %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.19" "librosa" "soundfile" %pip install -q "nncf>=2.6.0" "jiwer" import requests diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst index 463a8051cf4d8b..5f069551357b6d 100644 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -47,31 +47,31 @@ Imports .. parsed-literal:: Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.0) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.1) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -166,9 +166,9 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -274,7 +274,7 @@ For a single input sentence .. parsed-literal:: Label: POSITIVE - Total Time: 0.02 seconds + Total Time: 0.03 seconds Read from a text file diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 01d4b8fed8bb57..9f6857b608d962 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -214,10 +214,10 @@ you can add ``--sym``. For INT4 quantization you can also specify the following arguments : - The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. + quantization, -1 it will results in per-column quantization. - The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 194282459f56cb..584c3442c94af2 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -189,10 +189,10 @@ Prerequisites remote: Counting objects: 100% (153/153), done. remote: Compressing objects: 100% (99/99), done. remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) - Receiving objects: 100% (335/335), 72.41 MiB | 19.06 MiB/s, done. + Receiving objects: 100% (335/335), 72.41 MiB | 22.40 MiB/s, done. Resolving deltas: 100% (123/123), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images @@ -282,7 +282,7 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( @@ -296,6 +296,16 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: AE working on z of shape (1, 4, 32, 32) = 4096 dimensions. + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + +.. parsed-literal:: + >>> model checkpoint loaded. @@ -399,43 +409,43 @@ resolutions. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. std = torch.as_tensor(std, device=data.device, dtype=data.dtype) @@ -464,7 +474,7 @@ Convert AE encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! w_ = w_ * (int(c)**(-0.5)) @@ -508,15 +518,15 @@ Convert Diffusion U-Net model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels @@ -903,14 +913,14 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_79693/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_2590985/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 00:11:25 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.23 seconds + start: man fishing in a boat at sunset 2024-10-22 23:16:18 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 196.25 seconds .. code:: ipython3 @@ -1162,10 +1172,10 @@ quantization time. .. parsed-literal:: - 2024-10-08 00:40:44.424263: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 00:40:44.462873: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-22 23:45:51.693284: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-22 23:45:51.735392: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 00:40:45.077046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-22 23:45:52.354791: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -1347,8 +1357,8 @@ Let’s run the optimized pipeline .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 01:40:46 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.42 seconds + start: man fishing in a boat at sunset 2024-10-23 00:47:13 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.40 seconds .. code:: ipython3 @@ -1453,9 +1463,9 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 193.245 - INT8 latency: 97.168 - Performance speed up: 1.989 + FP32 latency: 195.358 + INT8 latency: 97.265 + Performance speed up: 2.009 Interactive inference diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index e9c5d2f07afec7..8d725c4594afc6 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -82,20 +82,12 @@ Prerequisites .. code:: ipython3 - import platform - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" "matplotlib>=3.4" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -116,9 +108,9 @@ Prerequisites remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 22.33 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 24.94 MiB/s, done. Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 @@ -385,23 +377,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -648,10 +640,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-10-08 01:57:55.723142: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 01:57:55.754489: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:04:24.934254: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:04:24.966235: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 01:57:56.401127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:04:25.612813: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -818,7 +810,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.82 ms + [ INFO ] Read model took 30.59 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -838,7 +830,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1394.30 ms + [ INFO ] Compile model took 1397.68 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -879,17 +871,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 815.67 ms + [ INFO ] First inference took 822.58 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 54 iterations - [ INFO ] Duration: 16885.27 ms + [ INFO ] Count: 51 iterations + [ INFO ] Duration: 16272.65 ms [ INFO ] Latency: - [ INFO ] Median: 1856.65 ms - [ INFO ] Average: 1850.85 ms - [ INFO ] Min: 1459.90 ms - [ INFO ] Max: 2009.04 ms - [ INFO ] Throughput: 3.20 FPS + [ INFO ] Median: 1835.79 ms + [ INFO ] Average: 1847.61 ms + [ INFO ] Min: 1271.60 ms + [ INFO ] Max: 2300.40 ms + [ INFO ] Throughput: 3.13 FPS .. code:: ipython3 @@ -915,7 +907,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 44.15 ms + [ INFO ] Read model took 43.92 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -935,7 +927,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1618.00 ms + [ INFO ] Compile model took 1623.93 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -976,17 +968,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 587.18 ms + [ INFO ] First inference took 578.24 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 58 iterations - [ INFO ] Duration: 16436.53 ms + [ INFO ] Count: 55 iterations + [ INFO ] Duration: 15797.98 ms [ INFO ] Latency: - [ INFO ] Median: 1670.45 ms - [ INFO ] Average: 1680.37 ms - [ INFO ] Min: 1321.28 ms - [ INFO ] Max: 2532.97 ms - [ INFO ] Throughput: 3.53 FPS + [ INFO ] Median: 1695.87 ms + [ INFO ] Average: 1683.49 ms + [ INFO ] Min: 550.40 ms + [ INFO ] Max: 1833.79 ms + [ INFO ] Throughput: 3.48 FPS Interactive segmentation demo @@ -1316,7 +1308,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index 8854bf68943a42..c6c2e7d72e694a 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ed908091274e314601740b2be6f08a06b74532f09d98e99703a91f1155ccd4 -size 1260810 +oid sha256:ee7d364aeabe9a2787785e4d0ee5bb8951f530f90a0a625c9f31047bdc157b59 +size 1261144 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 45da467e43595b..ff03bff6360a90 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:444eaadeac9bde960bc08fc6eed2ca8c5d5de854782d9ea322535ec1e35a38b0 -size 1261402 +oid sha256:40a049edc89f5339af256b80e0fc99740d9e3b6d3159e6e9d09e78683d24e0fe +size 1261722 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index 91cbceb5a9bc44..766db096877cba 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90b937884a9df8f888beb020e5a56dba5ee941f780ac61dd0fda8502909038d2 -size 1261809 +oid sha256:f4fcf4eecf855fa1a9ca72c0dbf5c070e11a4a520873971202b592bc65dd2ccc +size 1261487 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 5036c6f32a2259..394c6e5b92e9bf 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/explainable-ai-1-basic-with-output.rst b/docs/notebooks/explainable-ai-1-basic-with-output.rst index d04827023dc979..1df31312fd752f 100644 --- a/docs/notebooks/explainable-ai-1-basic-with-output.rst +++ b/docs/notebooks/explainable-ai-1-basic-with-output.rst @@ -72,7 +72,9 @@ Guide =2024.2.0" opencv-python tqdm # Install openvino xai package - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -128,12 +130,6 @@ Download the Model and data samples else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - - Select inference device ----------------------- @@ -146,15 +142,6 @@ select device from dropdown list for running inference using OpenVINO device = device_widget() device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model -------------- @@ -187,7 +174,7 @@ Load an Image # Reshape to model input shape. input_image = np.expand_dims(input_image, 0) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -196,7 +183,15 @@ Load an Image -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png Do Inference @@ -218,12 +213,6 @@ Do Inference imagenet_classes = imagenet_filename.read_text().splitlines() - -.. parsed-literal:: - - 'data/imagenet_2012.txt' already exists. - - .. code:: ipython3 # The model description states that for this model, class 0 is a background. @@ -276,23 +265,22 @@ saliency_map}). For classification, targets are indices of the classes. explanation = explainer( data=input_image, targets=result_index, # can be a single target or a container of targets - overlay=True, # saliency map overlay over the input image, defaults to False + label_names=imagenet_classes, # optional, list of label names + overlay=True, # saliency map overlays over the input image, defaults to False ) - plt.imshow(explanation.saliency_map[result_index]) - plt.title(f"Saliency map of the {result_index} class.") - - - - -.. parsed-literal:: - - Text(0.5, 1.0, 'Saliency map of the 206 class.') + explanation.plot() +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +Note: by default, overlay is applied over the image in the ``data`` +argument. In this case, ``data`` was preprocessed (e.g. resized to +224x224), but still recognizable by human. In order for the overlay to +applied over the original image, provide original image with +``original_image`` argument (please refer to `OpenVINO™ Explainable AI +Toolkit (2/3): Deep Dive `__). Above saliency map can help to answer the question: “Which part of the image mostly contributes to the model predicted class: (206, ‘n02099267 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png deleted file mode 100644 index 156c14c9b4af72..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:800d793a4cc16c26b283c899a8eab37260ca4711929b45c8206fc124aa75ab99 -size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png new file mode 100644 index 00000000000000..c6484cb68eeb33 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d8ae99fc5c2d7573e5243b1711dab83c7f4658aa423d067ebd17fd87d336c5 +size 351476 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png deleted file mode 100644 index 00a5b1e39fb4d5..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6207cc5c8c5fbfe4af1d74ef84cbdc03b86e79ed396b6a28bbb1fe2d9a176f7 -size 242785 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst index f96778061cc390..4e2ad0970661d2 100644 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst @@ -57,31 +57,31 @@ predicting a particular label. - `Preprocess image for MobileNet <#preprocess-image-for-mobilenet>`__ -- `Basic usage: Auto mode - explainer <#basic-usage-auto-mode-explainer>`__ +- `Basic usage: Explainer in AUTO + mode <#basic-usage-explainer-in-auto-mode>`__ - - `Create Explainer <#create-explainer>`__ - - `Do explanation <#do-explanation>`__ + - `Create Explainer object <#create-explainer-object>`__ + - `Generate explanation <#generate-explanation>`__ - `Visualize saliency maps <#visualize-saliency-maps>`__ - `Save saliency maps <#save-saliency-maps>`__ - - `Return saliency maps for all - classes <#return-saliency-maps-for-all-classes>`__ + - `Generate saliency maps for all + classes <#generate-saliency-maps-for-all-classes>`__ - `Pre- and post-process functions <#pre--and-post-process-functions>`__ - `Visualization Parameters <#visualization-parameters>`__ -- `White-box explainer <#white-box-explainer>`__ +- `Explainer in WHITEBOX mode <#explainer-in-whitebox-mode>`__ - - `ReciproCAM explain method <#reciprocam-explain-method>`__ + - `ReciproCAM XAI method <#reciprocam-xai-method>`__ - `Insert XAI branch <#insert-xai-branch>`__ - `Insertion-related parameters <#insertion-related-parameters>`__ -- `Black-box explainer <#black-box-explainer>`__ +- `Explainer in BLACKBOX mode <#explainer-in-blackbox-mode>`__ - `Advanced <#advanced>`__ - `Import ImageNet label names and add them to saliency maps <#import-imagenet-label-names-and-add-them-to-saliency-maps>`__ - - `Activation map explain method <#activation-map-explain-method>`__ + - `Activation map XAI method <#activation-map-xai-method>`__ Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -110,9 +110,11 @@ Install requirements import platform # Install openvino package - %pip install -q "openvino>=2024.2.0" opencv-python tqdm + %pip install -q "openvino>=2024.2.0" opencv-python tqdm scipy - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -175,11 +177,10 @@ converted to IR model from OpenVINO storage. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") +.. code:: ipython3 -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - + # Create ov.Model + model = ov.Core().read_model(model_xml_path) Load the Image ~~~~~~~~~~~~~~ @@ -196,7 +197,7 @@ Load the Image # The MobileNet model expects images in RGB format. image = cv2.cvtColor(cv2.imread(filename=str(image_filename)), code=cv2.COLOR_BGR2RGB) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -205,7 +206,15 @@ Load the Image -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png Preprocess image for MobileNet @@ -220,41 +229,41 @@ Preprocess image for MobileNet # Add batch dimension preprocessed_image = np.expand_dims(preprocessed_image, 0) -Basic usage: Auto mode explainer --------------------------------- +Basic usage: ``Explainer`` in ``AUTO`` mode +------------------------------------------- -The easiest way to run the explainer is to do it in Auto mode. Under the -hood of Auto mode, it will first try to run the ``White-Box`` mode. If -this fails, it will then run the ``Black-Box`` mode. See more details -about `White-Box <#white-box-explainer>`__ and -`Black-Box <#black-box-explainer>`__ modes below. +The easiest way to generate saliency maps is to use ``Explainer`` in +``ExplainMode.AUTO`` mode (``AUTO`` mode is used by default). -| Generating saliency maps involves model inference. The explainer will - perform model inference, but to do so, it requires ``preprocess_fn`` - and ``postprocess_fn``. -| At this stage, we can avoid passing ``preprocess_fn`` by preprocessing - the data beforehand (e.g., resizing and adding a batch dimension as - shown above). We also don’t pass ``postprocess_fn`` here for - simplicity, since the White-Box mode doesn’t fail on the example - model. +Under the hood of ``AUTO`` mode, ``Explainer`` will first try to run the +``WHITEBOX`` mode. If ``WHITEBOX`` fails, it will then run the +``BLACKBOX`` mode as a fallback option. See more details about +`WHITEBOX <#explainer-in-whitebox-mode>`__ and +`BLACKBOX <#explainer-in-blackbox-mode>`__ modes below. -To learn more about pre- and post-process functions, refer to the `Pre- +Generating saliency maps involves model inference. The explainer will +perform model inference, but to do so, it requires ``preprocess_fn`` and +``postprocess_fn``. We can avoid passing ``preprocess_fn`` by +preprocessing (e.g., resizing and adding a batch dimension as shown +above) the input data beforehand - by default, ``preprocess_fn`` is the +identity function. We expect that current example will successfully use +``WHITEBOX`` mode under the hood, therefore we don’t pass +``postprocess_fn`` (``postprocess_fn`` is not required for ``WHITEBOX`` +mode, only for ``BLACKBOX``). + +To learn more about pre- and post-process functions, refer to the `pre- and post-process functions <#pre--and-post-process-functions>`__ section. -Create Explainer -~~~~~~~~~~~~~~~~ +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - # Create ov.Model - model = ov.Core().read_model(model_xml_path) - - # Create explainer object explainer = xai.Explainer( model=model, task=xai.Task.CLASSIFICATION, @@ -269,27 +278,27 @@ Create Explainer INFO:openvino_xai:Explaining the model in white-box mode. -Do explanation -~~~~~~~~~~~~~~ +Generate ``explanation`` +~~~~~~~~~~~~~~~~~~~~~~~~ -The predicted label for this image is ``flat-coated_retriever`` with -label index ``206``. So here and further we will check saliency maps for -this index. +The predicted class for this model-image pair is +``flat-coated_retriever`` with class index ``206``. So here and further +we will check saliency maps for this index. .. code:: ipython3 - # You can choose classes to generate saliency maps for. - # In this notebook we will check maps for predicted class 206 - flat-coated retriever + # You can choose class(es) to generate saliency maps for. + # In this notebook we will check maps for predicted class with index 206 - "flat-coated retriever" retriever_class_index = 206 .. code:: ipython3 explanation = explainer( preprocessed_image, - targets=retriever_class_index, - overlay=True, # False by default + targets=retriever_class_index, # can be a single target or a container of targets + overlay=True, # saliency map overlay over the original image, False by default, set to True for better visual inspection ) Visualize saliency maps @@ -300,16 +309,14 @@ Visualize saliency maps .. code:: ipython3 explanation: Explanation - # Dict[int: np.ndarray] where key - class id, value - processed saliency map e.g. 354x500x3 - explanation.saliency_map + # explanation.saliency_map: Dict[int: np.ndarray] # where key - class id, value - processed saliency map (e.g. 354 x 500 x 3 shape) # Check saved saliency maps print(f"Saliency maps were generated for the following classes: {explanation.targets}") print(f"Saliency map size: {explanation.shape}") - # Show saliency maps for retriever class - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map); + # Visualize generated saliency maps for each target class (.plot() supports plotting multiple saliency maps) + explanation.plot() .. parsed-literal:: @@ -319,7 +326,7 @@ Visualize saliency maps -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png Save saliency maps @@ -330,38 +337,48 @@ Save saliency maps .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "explain_auto" - explanation.save(output) + explanation.save(base_artifacts_dir, "explain_auto_") .. code:: ipython3 - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") + # Plot saved saliency map + image_sal_map = cv2.imread(f"{base_artifacts_dir}/explain_auto_{retriever_class_index}.jpg") image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + plt.imshow(image_sal_map) + + + + +.. parsed-literal:: + -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png -Return saliency maps for all classes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png +Generate saliency maps for all classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +To obtain saliency maps for all classes, set ``targets`` to ``None`` or +``-1``. .. code:: ipython3 explanation = explainer(preprocessed_image, targets=-1) # Check saved saliency maps - print(f"Saliency maps were generated for the following classes: {explanation.targets}") + print(f"Saliency maps were generated for the following classes: {explanation.targets[:5]} ... {explanation.targets[-5:]}") print(f"Saliency map size: {explanation.shape}") .. parsed-literal:: - Saliency maps were generated for the following classes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000] + Saliency maps were generated for the following classes: [0, 1, 2, 3, 4] ... [996, 997, 998, 999, 1000] Saliency map size: (224, 224, 3) @@ -379,19 +396,19 @@ constructor. By default, ``preprocess_fn`` is an identity function that passes the input without any changes, assuming it is preprocessed beforehand. -In Auto mode, the explainer tries to run the White-Box mode first. If it -fails, the corresponding exception will be raised, and the Black-Box -mode will be enabled as a fallback. +In ``AUTO`` mode, the explainer tries to run the ``WHITEBOX`` mode +first. If it fails, the corresponding exception will be raised, and the +``BLACKBOX`` mode will be enabled as a fallback. -The Black-Box mode requires access to the output ``logits`` (activated -or not). Therefore, in such cases, ``postprocess_fn`` is required, which -accepts the raw IR model output and returns logits (see below for a -reference). +The ``BLACKBOX`` mode requires access to the output ``logits`` +(activated or not). Therefore, in such cases, ``postprocess_fn`` is +required, which accepts the raw IR model output and returns ``logits`` +(see below for a reference). .. code:: ipython3 def preprocess_fn(x: np.ndarray) -> np.ndarray: - # Implementing own pre-process function based on model's implementation + # Implementing pre-processing based on model's pipeline x = cv2.resize(src=x, dsize=(224, 224)) # Add batch dimension @@ -400,7 +417,7 @@ reference). def postprocess_fn(x: OVDict): - # Implementing own post-process function based on model's implementation + # Implementing post-processing function based on model's pipeline # Return "logits" model output return x[0] @@ -447,7 +464,7 @@ Visualization Parameters # Create explainer object explainer = xai.Explainer(model=model, task=xai.Task.CLASSIFICATION) - # Return overlayed image + # Generate overlayed saliency_map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -455,12 +472,10 @@ Visualization Parameters original_input_image=image, # to apply overlay on the original image instead of preprocessed one that was used for the explainer ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "overlay" - explanation.save(output) + explanation.save(base_artifacts_dir, "overlay_") .. parsed-literal:: @@ -472,33 +487,31 @@ Visualization Parameters -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png .. code:: ipython3 - # Return low-resolution saliency map + # Generate saliency map without overlay over original image explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names overlay=False, # False by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "colormap" - explanation.save(output) + explanation.save(base_artifacts_dir, "colormap_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png .. code:: ipython3 - # Return low-resolution gray-scale saliency map + # Return low-resolution (raw) gray-scale saliency map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -506,37 +519,37 @@ Visualization Parameters colormap=False, # True by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map, cmap="gray") + explanation.plot() # Save saliency map - output = base_artifacts_dir / "grayscale" - explanation.save(output) + explanation.save(base_artifacts_dir, "grayscale_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png -White-Box explainer -------------------- +``Explainer`` in ``WHITEBOX`` mode +---------------------------------- -ReciproCAM explain method +``ReciproCAM`` XAI method ~~~~~~~~~~~~~~~~~~~~~~~~~ -The White-Box explainer treats the model as a white box and needs to -make inner modifications. It adds extra XAI nodes after the backbone to -estimate which activations are important for model prediction. +``Explainer`` in ``WHITEBOX`` mode treats the model as a white box and +performs its inner modifications. ``Explainer`` inserts extra XAI nodes +after the backbone to estimate which activations are important for model +prediction. If a method is not specified, the XAI branch will be generated using the `ReciproCAM `__ method. By default, the insertion of the XAI branch will be done automatically -by searching for the correct node. +by searching for the correct node - ``target_layer`` (``target_layer`` +can be specified manually). It works quickly and precisely, requiring only one model inference. @@ -547,10 +560,8 @@ It works quickly and precisely, requiring only one model inference. model=model, task=xai.Task.CLASSIFICATION, preprocess_fn=preprocess_fn, - # defaults to ExplainMode.AUTO - explain_mode=ExplainMode.WHITEBOX, - # ReciproCAM is the default XAI method for CNNs - explain_method=xai.Method.RECIPROCAM, + explain_mode=ExplainMode.WHITEBOX, # defaults to ExplainMode.AUTO + explain_method=xai.Method.RECIPROCAM, # ReciproCAM is the default white-box method for CNNs ) @@ -586,7 +597,7 @@ environment. model, task=xai.Task.CLASSIFICATION, explain_method=xai.Method.RECIPROCAM, - target_layer="MobilenetV3/Conv_1/Conv2D", # MobileNet V3 + target_layer="MobilenetV3/Conv_1/Conv2D", # optional, by default insert_xai will try to find target_layer automatically embed_scaling=True, ) @@ -598,23 +609,28 @@ environment. INFO:openvino_xai:Insertion of the XAI branch into the model was successful. +**Note**: ``insert_xai`` supports both OpenVINO IR and PyTorch models. +See documentation for more details. + Insertion-related parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If automatic search for correct node fails, you can set up a correct -node manually with ``target_layer`` argument. For classification it’s +node manually with ``target_layer`` argument. For classification, it’s the last backbone node with shape [1, num_channels, feature_map_height, -feature_map_width]. For example, for MobileNetV3 it will be +feature_map_width]. For example, for the used MobileNetV3 it will be ``MobilenetV3/Conv_1/Conv2D`` layer with [1, 576, 7, 7] output shape. To find the right ``target_layer`` for your model, check the name of the -last convolutional layer in the backbone using ``.XML`` model. +last convolutional node in the backbone using ``.XML`` file (optionally, +use some graph visualization tool, such as Netron). ``embed_scaling`` **default True** (for speed purposes), this parameter -adds normalization to the XAI branch, which results in being able to -visualize saliency maps right away without further postprocessing. +ensures that saliency map scaling is embedded into the graph, which +results in being able to visualize saliency maps right away without +further postprocessing. .. code:: ipython3 @@ -638,25 +654,32 @@ visualize saliency maps right away without further postprocessing. INFO:openvino_xai:Explaining the model in white-box mode. -Black-Box explainer -------------------- - +``Explainer`` in ``BLACKBOX`` mode +---------------------------------- -The Black-Box method treats the model as a black box without altering -its structure. Therefore, this method will work on any model that can be -inferred and return class probabilities as output. -The `RISE `__ algorithm used in -Black-Box mode applies random masks to hide parts of the image, -retrieves the resulting class probabilities, and uses this information -to calculate the “importance” of each part of the image for the final -results. After performing thousands of inferences, a summarized saliency -map is generated. +``Explainer`` in ``BLACKBOX`` mode treats the model as a black box +without altering its internal structure. Therefore, this method will +work on any model that can be inferred and return class scores as +output. While it is convenient to treat every model as a black box for -explanation purposes, this algorithm may require a large number of -inferences (defaulting to 5000) to generate a high-quality saliency map. +explanation purposes, black-box method may require a significant number +of inferences (AISE requires 120-500 model inferences). + +Given that the quality of the saliency maps usually correlates with the +number of available inferences, we propose the following presets for the +black-box methods: ``Preset.SPEED``, ``Preset.BALANCE``, +``Preset.QUALITY`` (``Preset.BALANCE`` is used by default). + +AISE (Adaptive Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +AISE is used as a default black-box method. AISE formulates saliency map +generation as a kernel density estimation (KDE) problem, and adaptively +sample input masks using a derivative-free optimizer to maximize mask +saliency score. .. code:: ipython3 @@ -673,45 +696,69 @@ inferences (defaulting to 5000) to generate a high-quality saliency map. explanation = explainer( image, targets=retriever_class_index, - # targets=-1, # Explain all classes overlay=True, - num_masks=1000, # kwargs of the RISE algo ) .. parsed-literal:: INFO:openvino_xai:Explaining the model in black-box mode. - Explaining in synchronous mode: 100%|██████████| 1000/1000 [00:03<00:00, 259.73it/s] .. code:: ipython3 + # Plot saliency map + explanation.plot() + # Save saliency map - output = base_artifacts_dir / "blackbox_explain" - explanation.save(output) + explanation.save(base_artifacts_dir, "blackbox_aise_") + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png + + +RISE (Randomized Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`RISE `__ probes a model by +sub-sampling the input image via random masks and records its response +to each of them. RISE creates random masks from down-scaled space +(e.g. 7×7 grid) and adds random translation shifts for the pixel-level +explanation with further up-sampling. Weighted sum of all sampled masks +used to generate the fine-grained saliency map. + +.. code:: ipython3 + + # Create explainer object + explainer = xai.Explainer( + model=model, + task=xai.Task.CLASSIFICATION, + preprocess_fn=preprocess_fn, + postprocess_fn=postprocess_fn, + explain_mode=ExplainMode.BLACKBOX, # defaults to AUTO + explain_method=xai.Method.RISE, # xai.Method.AISE is used by default + ) - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") - image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + # Generate explanation + explanation = explainer( + image, + targets=retriever_class_index, + overlay=True, + ) +.. code:: ipython3 + # Plot saliency map + explanation.plot() + + # Save saliency map + explanation.save(base_artifacts_dir, "blackbox_rise_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png -For the ``Black-Box explainer``, the number of masks and cells is -crucial for achieving good results. In the example above, it’s evident -that the number of masks was insufficient to create a high-quality map. +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png -Varying the ``num_cells`` and ``num_masks`` parameters can achieve -different goals: - To speed up the explanation, you can reduce the -number of ``num_masks``. However, this will decrease the quality of the -resulting saliency maps, making it suitable for large and focused -objects. - Increasing ``num_cells`` provides a more fine-grained result, -but it requires a larger ``num_masks`` to converge. This approach is -more effective for classes with complex shapes. Advanced -------- @@ -724,9 +771,8 @@ Import ImageNet label names and add them to saliency maps If ``label_names`` are not provided to the explainer call, the saved -saliency map will have the predicted class index, not the name. For -example, ``image_name_target_206.jpg`` instead of -``image_name_target_retriever.jpg``. +saliency map will have the predicted class index, not the label name. +For example, ``206.jpg`` instead of ``retriever.jpg``. To conveniently view label names in saliency maps, we provide ImageNet label names information to the explanation call. @@ -781,8 +827,8 @@ label names information to the explanation call. # Adding ImageNet label names. explanation = explainer( image, - # Return saliency maps for 2 named labels - targets=["flat-coated_retriever", "microwave"], # Also label indices [206, 652] are possible as target + # Return saliency maps for 2 named labels, possible if label_names is provided + targets=["flat-coated_retriever", "microwave"], # slso label indices [206, 652] are possible as target label_names=imagenet_labels, ) @@ -797,8 +843,7 @@ label names information to the explanation call. .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "label_names" - explanation.save(output) + explanation.save(base_artifacts_dir, "label_names_") Below in ``base_artifacts_dir / "label_names"`` you can see saved saliency maps with label name on it: @@ -806,18 +851,18 @@ saliency maps with label name on it: .. code:: ipython3 # See saliency mas saved in `output` with predicted label in image name - for file_name in output.glob("*"): + for file_name in base_artifacts_dir.glob("label_names_*"): print(file_name) .. parsed-literal:: - artifacts/label_names/target_microwave.jpg - artifacts/label_names/target_flat-coated_retriever.jpg + artifacts/label_names_microwave.jpg + artifacts/label_names_flat-coated_retriever.jpg -Activation map explain method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Activation map XAI method +~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -840,11 +885,8 @@ saliency maps for each class, the activation map is returned as explain_method=xai.Method.ACTIVATIONMAP, ) - explanation = explainer(image, targets=-1, overlay=True) - activation_map = explanation.saliency_map["per_image_map"] - - plt.imshow(activation_map) - plt.show() + explanation = explainer(image, overlay=True) + explanation.plot() .. parsed-literal:: @@ -855,5 +897,5 @@ saliency maps for each class, the activation map is returned as -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png deleted file mode 100644 index e5049a64f6dbef..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e52cd26d8672300419d11ceda43b756f44961da4d0ed4ba1b907eb5223d4c546 -size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png deleted file mode 100644 index b1b2f91a1c2a7d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca8b8d022b4ebab307caa62d5de9be5553ee84492bcc1709a8267a3aba8f2374 -size 237061 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png new file mode 100644 index 00000000000000..8822fe615f6f19 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b40d8eb0d4a89c7c24a9cc676e3b4f298e5eabdb7a9d5a5604d4c8533dca7f +size 342254 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png deleted file mode 100644 index 4e780c1f4cba88..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26893793cf6015fdf551d8554731e41aad9a436bfe2df0fcdbf469b20d25eb22 -size 233040 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png new file mode 100644 index 00000000000000..125556adbb530c --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354fa33f0a0e6becba16a5c65dde256282215e75a1e379500fd9e9d5fed7845e +size 235673 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png deleted file mode 100644 index 0f9120d6ab8b9d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd585e98c3b12072d9d2fd2b24f3e46e946f4a3771b10a7b987e3e126b218fb -size 336183 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png deleted file mode 100644 index 541cb8c169552e..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ae15d3b00ae8e36c86ffce352d92f24c370ca2948b53c06bc2bb9a9d3e73356 -size 51371 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png deleted file mode 100644 index f42dca24596405..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90c7bf014e9c04a3ed78cf6965ed82ae371fc7c4a18fd717190e1e5561f0dc0a -size 6162 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png new file mode 100644 index 00000000000000..104fcabc090172 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14efddd662af5ecae15f38d9fa20e001d7c1f1f26418d3a89ea0f489a5aee993 +size 312661 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png new file mode 100644 index 00000000000000..60fdb91b059005 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae625a36aeb18c1fd5f5b7e673b7e58836f8bf914b43906cb4e5c81cb33885f +size 62966 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png new file mode 100644 index 00000000000000..1c0f2dbbeb4f3a --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2596ddca7816d246bea422e0c9b809a41feaf65163d6c27c0b422ba6f16a440 +size 4947 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png deleted file mode 100644 index 6ed4fcc4d48282..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd21924f3fd3631e547d7b0254a148df812f50da4c784bdc4357ad8635e4cd7 -size 354123 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png new file mode 100644 index 00000000000000..2c5b7a96ca9399 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d33ea52b17c068fb6e2bff4ad0d4f0993e38a6a074a3ac1af3ccaefec2199d +size 326076 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png new file mode 100644 index 00000000000000..6fa958fe614823 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ea03631f0daa7400793e57138b4af52e13dc5294a3440688dd27a5034215e +size 324115 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png deleted file mode 100644 index 3f0c1df4b7e57c..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b654a427e48742a5d20a15cf89fb3c7667123ce981c6e719299b42931e696e0 -size 336612 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png new file mode 100644 index 00000000000000..98255daa2893ee --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e60ccfb5582f57e813fbcd3ee55c35677f3cb391e7a4eb67b10319ee911f341 +size 314537 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst index 4a76c94a411d17..537ae36f6a331c 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst @@ -57,12 +57,11 @@ low-confident prediction, and wrong prediction. - `Select inference device <#select-inference-device>`__ - `Load the Model <#load-the-model>`__ - - `Define preprocess_fn and - postprocess_fn <#define-preprocess_fn-and-postprocess_fn>`__ + - `Define preprocess_fn <#define-preprocess_fn>`__ - `Explain <#explain>`__ - - `Create explainer <#create-explainer>`__ + - `Create Explainer object <#create-explainer-object>`__ - `Import ImageNet label names <#import-imagenet-label-names>`__ - `Explain using ImageNet labels <#explain-using-imagenet-labels>`__ @@ -110,7 +109,11 @@ Install requirements # Install openvino package %pip install -q "openvino>=2024.2.0" opencv-python tqdm - %pip install -q --no-deps "openvino-xai>=1.0.0" + + # Install openvino xai package + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -184,12 +187,6 @@ classify since they’re all dog breeds. image_folder_path = data_folder / "imagewoof320" / "imagewoof320" - -.. parsed-literal:: - - Dataset is already downloaded to artifacts and extracted. - - .. code:: ipython3 # Create list of images to explain @@ -236,12 +233,6 @@ scaling and normalization with certain values. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - mobilenetv3_large_100.ra_in1k already downloaded to artifacts - - Prepare model to run inference ------------------------------ @@ -260,15 +251,6 @@ select device from dropdown list for running inference using OpenVINO device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model ~~~~~~~~~~~~~~ @@ -281,17 +263,17 @@ Load the Model model = core.read_model(model=model_xml_path) compiled_model = core.compile_model(model=model, device_name=device.value) -Define preprocess_fn and postprocess_fn -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Define ``preprocess_fn`` +~~~~~~~~~~~~~~~~~~~~~~~~ -To run model inference, you need to define functions to preprocess data -and postprocess the results based on the model’s implementation. Since -the used model is originally from `timm -storage `__, we -need to apply specific timm preprocessing, including normalization and -scaling with certain values. +This notebook using ``WHITEBOX`` mode for model explanation - it is +required to define function to preprocess data (the alternative is to +preprocess input data). Since the used model is originally from `timm +storage `__, it is +required to apply specific timm preprocessing, including normalization +and scaling with certain values. .. code:: ipython3 @@ -312,40 +294,22 @@ scaling with certain values. # Add batch dimension x = np.expand_dims(x, 0) return x - - - def postprocess_fn(x: np.ndarray) -> np.ndarray: - """ - Process model prediction - """ - prediction_processed = softmax(x) - # Remove batch dimention - return prediction_processed[0] - - - def softmax(x): - """Compute softmax values of x.""" - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum() Explain ------- -Create explainer -~~~~~~~~~~~~~~~~ - - +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``explainer`` can internally apply pre-processing during model -inference, allowing raw images as input. -To enable this, define ``preprocess_fn`` and provide it to the explainer -constructor. In cases where we pass multiple unprocessed images, as done -in this notebook, we need to define ``preprocess_fn``. -If it’s not defined, it is assumed that the input is preprocessed. +The ``Explainer`` object can internally apply pre-processing during +model inference, allowing raw images as input. To enable this, define +``preprocess_fn`` and provide it to the explainer constructor. If +``preprocess_fn`` is not defined, it is assumed that the input is +preprocessed. .. code:: ipython3 @@ -375,11 +339,10 @@ Import ImageNet label names If ``label_names`` are not provided to the explainer call, the saved saliency map will have the predicted class index, not the name. For -example, ``image_name_target_167.jpg`` instead of -``image_name_target_English_foxhound.jpg``. +example, ``167.jpg`` instead of ``English_foxhound.jpg``. -To conveniently view label names in saliency maps, we provide ImageNet -label names information to the explanation call. +To conveniently view label names in saliency maps, we prepare and +provide ImageNet label names information to the explanation call. .. code:: ipython3 @@ -430,10 +393,10 @@ to the explainer. targets=[ "flat-coated_retriever", "Samoyed", - ], # Also label indices [206, 258] are possible as target + ], # also label indices [206, 258] are possible as target label_names=imagenet_labels, ) - explanation.save(output, Path(image_path).stem) + explanation.save(output, f"{Path(image_path).stem}_") # pass prefix name with underscore Below in ``base_artifacts_dir / "saliency_maps" / "multiple_images"`` you can see saved saliency maps: @@ -447,8 +410,8 @@ you can see saved saliency maps: .. parsed-literal:: - artifacts/saliency_maps/multiple_images/n02105641_2491_target_flat-coated_retriever.jpg - artifacts/saliency_maps/multiple_images/n02105641_2491_target_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_flat-coated_retriever.jpg Notable use cases in ImageWoof dataset @@ -544,6 +507,20 @@ The cell below contains paths to images with those respective use cases: print(f"Predicted class {imagenet_labels[index]}, index {index}, probability: {score:.2f}") return result_infer, result_idxs, result_scores + + + def postprocess_fn(x: np.ndarray) -> np.ndarray: + """ + Process model prediction + """ + prediction_processed = softmax(x) + return prediction_processed[0] # remove batch dimension + + + def softmax(x): + """Compute softmax values of x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() Explain for each use case ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -573,22 +550,22 @@ Explain for each use case explanation = explainer( image, - targets=result_idxs, # Return saliency maps for predicted classes + targets=result_idxs, # return saliency maps for predicted classes label_names=imagenet_labels, overlay=True, ) - # Save saliency maps, use detailed implementation instead of `explanation.save` - # to return predicted scores for saliency maps as well + saliency_map_name_prefix = f"{image_name}_{gt_info}_pr_" + saliency_map_name_postfix = "_" + confidence_scores = {} for idx, score in zip(result_idxs, result_scores): - target_name = imagenet_labels[idx] - cv2.imwrite( - os.path.join( - output / use_case, - f"{image_name}_{gt_info}_pr_{target_name}_{score:.2f}.jpg", - ), - img=explanation.saliency_map[idx], - ) + confidence_scores[idx] = score + explanation.save( + dir_path=(output / use_case), + prefix=saliency_map_name_prefix, + postfix=saliency_map_name_postfix, + confidence_scores=confidence_scores, + ) .. parsed-literal:: @@ -628,30 +605,30 @@ Explain for each use case True_positive_high_confidence - n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 - n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02088364_2019_gt_beagle_0.97_pr_beagle_0.97 + n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02105641_817_gt_Old English sheepdog_0.96_pr_Old_English_sheepdog_0.96 + n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 True_positive_low_confidence - n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 - n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 n02099601_7942_gt_golden retriever_0.18_pr_golden_retriever_0.18 + n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 + n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 n02086240_1765_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 False_positive_high_confidence n02088364_12304_gt_beagle_0.01_pr_car_mirror_0.82 - n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 - n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 - n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 n02099601_4933_gt_golden retriever_0.05_pr_bubble_0.79 n02096294_2323_gt_Australian terrier_0.00_pr_quilt_0.80 - n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 + n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 n02087394_6357_gt_Rhodesian ridgeback_0.00_pr_dalmatian_0.98 + n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 True_positive_two_predictions - n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_golden_retriever_0.30 + n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_Labrador_retriever_0.57 n02111889_374_gt_Samoyed_0.43_pr_crib_0.39 @@ -693,6 +670,7 @@ of pictures, their names, and the confidence of predictions: for image_path, ax in zip(image_paths, axs): image_sal_map = cv2.imread(f"{use_case_output_dir}/{image_path}") + image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) image_name = Path(image_path).stem image_name = image_name.replace("_target", "") diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png index f149c148287d67..1a9f33c3368b17 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff5a5cd559b24fdfae9eb238438d8e1e65be9c5878d2d8a48551038a2175dd90 -size 935667 +oid sha256:cbb403d4ab869af3d0d82a3a7980192f2da68c7df2fe27ebd34340465592f46e +size 974504 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png index 281a37e1ff476e..62018be36e4c3a 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:056291c27310ffbff98af36276ead5fa216a04e6f55507e0559a86e277783499 -size 893683 +oid sha256:ad8213f746e218068621812a53f4ec4337fb1d0f8fcc763566e5f9e4251cbcb2 +size 917046 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png index 0a23b9ac833d18..40b6043e87691d 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b05bbfa7b014ab362a42ba8cf97c4604fd486b43b76e365802681fb3c2678d6 -size 673321 +oid sha256:12061dc812cec3219863c5936441baa8ce5b86015acf978ca3d4dcf1212b9c02 +size 681815 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png index 5ca94c55195397..a04779fd42ed48 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e61cbfbf2fbac5b143f72a31d5de6ecabb6c6e705995129966becbb09bfa0cec -size 715492 +oid sha256:3f48b01dde2edffcea65f08fafe13ff158314f7e75534bbc5d7af027cbc43f5e +size 746506 diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 65bd0c194a8116..c29a58c33e3490 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -77,8 +77,8 @@ Install requirements .. code:: ipython3 - %pip install -q "ultralytics==8.2.24" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino-dev>=2024.0.0" + %pip install -q "ultralytics==8.2.24" "matplotlib>=3.4" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.4.0" %pip install -q "nncf>=2.9.0" %pip install -q "gradio>=4.13" @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 67.6MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 56.5MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 612.7ms - Speed: 3.0ms preprocess, 612.7ms inference, 794.5ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 662.7ms + Speed: 3.8ms preprocess, 662.7ms inference, 766.0ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -214,10 +214,10 @@ tracing. The FastSAM model itself is based on YOLOv8 model. PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.0s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.2s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.1ms - Speed: 5.7ms preprocess, 498.1ms inference, 31.2ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 558.8ms + Speed: 5.7ms preprocess, 558.8ms inference, 34.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -615,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 70 seconds. - Resulting in 1.83 fps + Segmented in 69 seconds. + Resulting in 1.86 fps .. code:: ipython3 @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 21 seconds - Resulting in 6.1 fps - That is 3.33 times faster! + Segmented in 23 seconds + Resulting in 5.57 fps + That is 3.0 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index a09f2a1ea60399..4cca2d85a2bd6c 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -51,20 +51,12 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "matplotlib>=3.4" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -108,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-10-08 02:10:50.200273: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:10:50.234398: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:17:38.434215: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:17:38.467940: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:10:50.883345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:17:39.118965: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -201,19 +193,19 @@ pipeline. .. parsed-literal:: - config.json: 0%| | 0.00/2.43k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/flux.1-image-generation-with-output.rst b/docs/notebooks/flux.1-image-generation-with-output.rst index 09a05c0e73bf8d..62549bd074d4a6 100644 --- a/docs/notebooks/flux.1-image-generation-with-output.rst +++ b/docs/notebooks/flux.1-image-generation-with-output.rst @@ -51,14 +51,6 @@ Prerequisites %pip install -q "sentencepiece" "protobuf" %pip install -qU "openvino>=2024.4.0" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import requests @@ -116,11 +108,17 @@ FLUX.1-dev version using widget bellow. .. parsed-literal:: - 2024-10-08 02:11:42.908018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:11:42.941481: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:11:43.614104: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + 2024-08-13 17:30:13.543036: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-08-13 17:30:13.544738: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-08-13 17:30:13.579013: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-08-13 17:30:14.449873: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + @@ -187,71 +185,10 @@ and convert each part of pipeline using ``ov.convert_model``. model_dir = convert_flux(model_selector.value) - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - - -.. parsed-literal:: - - ✅ Clip Text encoder conversion finished - ⌛ T5 Text encoder conversion started - ✅ T5 Text encoder conversion finished - ⌛ VAE decoder conversion started - - .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if hidden_states.shape[0] >= 64: - - -.. parsed-literal:: - - ✅ VAE decoder onversion finished - ✅ black-forest-labs/FLUX.1-schnell successfully converted and can be found in FLUX.1-schnell - + ✅ black-forest-labs/FLUX.1-schnell model already converted and can be found in FLUX.1-schnell + .. code:: ipython3 @@ -331,136 +268,11 @@ compression. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - ⌛ transformer compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 0% (1 / 502) │ 0% (0 / 501) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 100% (501 / 502) │ 100% (501 / 501) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ transformer compression finished Compressed transformer can be found in FLUX.1-schnell/transformer/transformer_int4.xml - ⌛ text_encoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 33% (3 / 74) │ 0% (0 / 71) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 67% (71 / 74) │ 100% (71 / 71) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder compression finished Compressed text_encoder can be found in FLUX.1-schnell/text_encoder/text_encoder_int4.xml - ⌛ text_encoder_2 compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 4% (3 / 170) │ 0% (0 / 167) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 96% (167 / 170) │ 100% (167 / 167) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder_2 compression finished Compressed text_encoder_2 can be found in FLUX.1-schnell/text_encoder_2/text_encoder_2_int4.xml - ⌛ vae_decoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 98% (36 / 39) │ 0% (0 / 3) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 2% (3 / 39) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ vae_decoder compression finished Compressed vae_decoder can be found in FLUX.1-schnell/vae/vae_decoder_int4.xml - + Run OpenVINO model inference ---------------------------- @@ -506,7 +318,7 @@ model and inference device as arguments. .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') @@ -521,8 +333,17 @@ model and inference device as arguments. ✅ transformer - Done! ✅ text_encoder - Done! ✅ text_encoder_2 - Done! - ✅ vae - Done! + +.. parsed-literal:: + + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + + +.. parsed-literal:: + + ✅ vae - Done! + .. code:: ipython3 @@ -565,20 +386,6 @@ Interactive demo # demo.launch(share=True) # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - + demo.launch(debug=True, share=True) diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg index d4dcc2dcc6d8b2..7223074e225dd8 100644 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8160a0e39a306d7337c724b69f447f1b3aca95eaf93de866194e65678c0d01ba -size 14210 +oid sha256:8923d4a7ce04ed66bb58c28ab4450594a0340b97a15245fc594e669d33b574ef +size 14369 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png index b7d17c76ea39b4..83b60f80d75c13 100644 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81d23cd276ecf3af1eda5b11e80510737046dd245e3d295965a23f596f4b99f5 -size 115703 +oid sha256:fab1fa5bf3f71d6bb6f505658da6765d37295b7b9618560d5f7c0c344e2a3896 +size 116250 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index 73db954afdfbbb..a3c914a44b36a7 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -104,7 +104,7 @@ Check if FreeVC is installed and append its path to ``sys.path`` remote: Counting objects: 100% (74/74), done. remote: Compressing objects: 100% (47/47), done. remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 17.35 MiB/s, done. + Receiving objects: 100% (131/131), 15.28 MiB | 3.81 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -134,8 +134,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:28<00:00, 44.0MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:26<00:00, 48.4MB/s] .. code:: ipython3 @@ -239,13 +239,13 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") .. parsed-literal:: - Loaded the voice encoder model on cpu in 0.00 seconds. + Loaded the voice encoder model on cpu in 0.01 seconds. Reading dataset settings @@ -288,7 +288,7 @@ Inference .. parsed-literal:: - 2it [00:03, 1.90s/it] + 2it [00:03, 1.72s/it] Result audio files should be available in ‘outputs/freevc’ @@ -360,13 +360,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +581,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25912 / 25920 (100.0%) - Greatest absolute difference: 1.0370872616767883 at index (0, 0, 18175) (up to 1e-05 allowed) - Greatest relative difference: 8656.24884080371 at index (0, 0, 11526) (up to 1e-05 allowed) + Mismatched elements: 25909 / 25920 (100.0%) + Greatest absolute difference: 0.713585376739502 at index (0, 0, 4783) (up to 1e-05 allowed) + Greatest relative difference: 22806.258007743752 at index (0, 0, 19024) (up to 1e-05 allowed) _check_trace( @@ -645,7 +645,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:01, 1.29it/s] + 2it [00:01, 1.30it/s] Result audio files should be available in ‘outputs/freevc’ and you can @@ -707,7 +707,7 @@ Result audio: diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index ee6741ff7af4e7..51522e791e04a4 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -124,16 +124,16 @@ segmentation you can select vanilla ``SAM``. Cloning into 'GroundingDINO'... remote: Enumerating objects: 379, done. remote: Counting objects: 100% (190/190), done. - remote: Compressing objects: 100% (80/80), done. - remote: Total 379 (delta 135), reused 110 (delta 110), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 19.90 MiB/s, done. + remote: Compressing objects: 100% (81/81), done. + remote: Total 379 (delta 135), reused 109 (delta 109), pack-reused 189 (from 1) + Receiving objects: 100% (379/379), 14.03 MiB | 18.28 MiB/s, done. Resolving deltas: 100% (194/194), done. Cloning into 'EfficientSAM'... remote: Enumerating objects: 424, done. remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 25.51 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 30.07 MiB/s, done. Resolving deltas: 100% (246/246), done. @@ -222,6 +222,7 @@ GroundingDINO imports .. parsed-literal:: + FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers UserWarning: Failed to load custom C++ ops. Running on CPU mode Only! @@ -506,10 +507,10 @@ class, but the inference will be done using OpenVINO optimized model. .. parsed-literal:: - 2024-10-08 02:28:09.725059: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:28:09.764729: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:22:04.926963: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:22:04.966234: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:28:10.354526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:22:05.582957: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Convert predicted boxes to supervision box detections format diff --git a/docs/notebooks/handwritten-ocr-with-output.rst b/docs/notebooks/handwritten-ocr-with-output.rst index 9f03d782ed2235..a66f73f07d99b4 100644 --- a/docs/notebooks/handwritten-ocr-with-output.rst +++ b/docs/notebooks/handwritten-ocr-with-output.rst @@ -49,21 +49,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + # Install openvino package + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hello-detection-with-output.rst b/docs/notebooks/hello-detection-with-output.rst index 85dff2edc5cb31..d9293f8da61279 100644 --- a/docs/notebooks/hello-detection-with-output.rst +++ b/docs/notebooks/hello-detection-with-output.rst @@ -41,7 +41,7 @@ Guide =2023.1.0" opencv-python tqdm + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index 6f55f7666710cc..e490cf48533277 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -35,21 +35,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -196,7 +188,7 @@ is provided. .. parsed-literal:: - + @@ -223,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-world-with-output.rst b/docs/notebooks/hello-world-with-output.rst index 23ea9c15b85df0..5bd1216db29701 100644 --- a/docs/notebooks/hello-world-with-output.rst +++ b/docs/notebooks/hello-world-with-output.rst @@ -37,23 +37,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - + # Install required packages + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index b13205541f558f..e17d63a031b445 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -196,7 +196,7 @@ Note how we reuse our real ``encoded_input``, passing it to the .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -333,10 +333,10 @@ documentation `__. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - 2024-10-08 02:29:42.074725: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:29:42.108881: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:23:29.312809: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:23:29.347879: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:29:42.698091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:23:29.943155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Initialize and Convert the Model Automatically using OVModel class @@ -372,13 +372,9 @@ inference run. .. parsed-literal:: - Framework not specified. Using pt to export the model. Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False .. parsed-literal:: @@ -386,11 +382,6 @@ inference run. WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. -.. parsed-literal:: - - Compiling the model to AUTO ... - - Convert model using Optimum CLI interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -443,7 +434,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-10-08 02:29:55.851395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:23:44.450300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4}] @@ -474,18 +465,18 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['image-classification', 'depth-estimation', 'object- - detection', 'zero-shot-object-detection', 'text-to- - audio', 'token-classification', 'audio-frame- - classification', 'semantic-segmentation', 'automatic- - speech-recognition', 'image-segmentation', 'question- - answering', 'text-classification', 'multiple-choice', - 'inpainting', 'masked-im', 'text2text-generation', - 'image-to-text', 'text-generation', 'sentence- - similarity', 'mask-generation', 'text-to-image', - 'audio-xvector', 'zero-shot-image-classification', - 'fill-mask', 'image-to-image', 'feature-extraction', - 'audio-classification']. For decoder models, use `xxx- + ['audio-xvector', 'text-to-audio', 'fill-mask', + 'feature-extraction', 'text-generation', 'zero-shot- + image-classification', 'text-to-image', 'text2text- + generation', 'zero-shot-object-detection', 'automatic- + speech-recognition', 'text-classification', 'semantic- + segmentation', 'masked-im', 'image-to-text', + 'sentence-similarity', 'object-detection', + 'inpainting', 'audio-frame-classification', 'image-to- + image', 'token-classification', 'question-answering', + 'audio-classification', 'image-segmentation', 'image- + classification', 'mask-generation', 'multiple-choice', + 'depth-estimation']. For decoder models, use `xxx- with-past` to export the model using past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, @@ -592,16 +583,10 @@ compression: .. parsed-literal:: - 2024-10-08 02:30:01.388483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. + 2024-10-23 01:23:49.987001: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - OpenVINO Tokenizers is not available. To deploy models in production with C++ code, please follow installation instructions: https://github.com/openvinotoolkit/openvino_tokenizers?tab=readme-ov-file#installation - Tokenizer won't be converted. @@ -612,12 +597,6 @@ be loaded using the same OVModelForXXX class. model = OVModelForSequenceClassification.from_pretrained("models/optimum_model/fp16", device=device.value) - -.. parsed-literal:: - - Compiling the model to AUTO ... - - There are some models in the Hugging Face Models Hub, that are already converted and ready to run! You can filter those models out by library name, just type OpenVINO, or follow `this diff --git a/docs/notebooks/image-bind-with-output.rst b/docs/notebooks/image-bind-with-output.rst index 12321638ed19f8..1e8ecd63c1de0e 100644 --- a/docs/notebooks/image-bind-with-output.rst +++ b/docs/notebooks/image-bind-with-output.rst @@ -131,7 +131,7 @@ Prerequisites import platform - %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q datasets regex librosa soundfile pytorchvideo ftfy "timm>=0.6.7" einops fvcore "openvino>=2024.0.0" "nncf>=2.9.0" numpy scipy --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index f45c3ca5ec3b32..b5b33fdfc852e7 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" "nncf>=2.6.0" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "nncf>=2.6.0" torch torchvision tqdm "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -112,7 +104,7 @@ Model preparation stage has the following steps: remote: Counting objects: 100% (281/281), done. remote: Compressing objects: 100% (96/96), done. remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 (from 1) - Receiving objects: 100% (282/282), 9.22 MiB | 18.95 MiB/s, done. + Receiving objects: 100% (282/282), 9.22 MiB | 24.58 MiB/s, done. Resolving deltas: 100% (135/135), done. @@ -184,7 +176,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:06<00:00, 24572685.83it/s] + 100%|██████████| 170498071/170498071 [00:07<00:00, 23490143.19it/s] .. parsed-literal:: @@ -256,10 +248,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-10-08 02:30:41.915322: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:30:41.946467: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:24:29.479834: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:24:29.511386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:30:42.497931: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:24:30.071901: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -427,7 +419,7 @@ Tool =2023.3.0" opencv-python transformers "diffusers>=0.24.0" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" + %pip install -q "openvino>=2023.3.0" opencv-python transformers "diffusers>=0.24.0" "matplotlib>=3.4" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" Convert and prepare Face IdentityNet ------------------------------------ diff --git a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst index 6c27e0431eea09..72a9c88e8ea0c3 100644 --- a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst +++ b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst @@ -71,15 +71,8 @@ Install necessary packages .. code:: ipython3 - import platform - - %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" "matplotlib>=3.4" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" Create Pytorch Models pipeline ------------------------------ diff --git a/docs/notebooks/internvl2-with-output.rst b/docs/notebooks/internvl2-with-output.rst index 038e397209cb6c..602bfe84590494 100644 --- a/docs/notebooks/internvl2-with-output.rst +++ b/docs/notebooks/internvl2-with-output.rst @@ -275,6 +275,16 @@ documentation self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -521,7 +531,7 @@ Let’s check model capabilities in answering questions about image: .. parsed-literal:: - The image displays a young red panda cub. This cute creature has a striking mix of dark reds and whites on its fur, with a striking white underbelly and back. The cub is sitting and peering forward with a curious expression. It appears to be peering through a wooden partition, and a piece of bamboo is visible in the bottom-left corner of the image. The background includes green foliage, suggesting a natural habitat for the cub. This cub is cute and looks like it's enjoying + The image depicts a red panda, a large feline with a distinctive, reddish-brown coat and white face and chest. It is peeking over what appears to be a wooden platform or platform made for panda viewing in captivity. The background is filled with greenery, indicating that the photo was likely taken in a conservatory or wildlife park where penguins or seabirds are displayed. Interactive demo ---------------- diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 5b61ee9af2d3e4..466e3be5d03fd1 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -77,7 +77,7 @@ Prerequisites .. code:: ipython3 %pip install -q "openvino>=2024.2.0" "datasets>=2.20" "nncf>=2.11.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" .. parsed-literal:: @@ -104,50 +104,14 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) - -.. parsed-literal:: - - configuration_clip.py: 0%| | 0.00/11.7k [00:00=2024.0.0" "nncf>=2.11.0" "datasets>=2.20.0" - %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python + %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python "matplotlib>=3.4" %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -152,10 +152,10 @@ example `__ .. parsed-literal:: - 2024-10-08 02:37:47.151795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:37:47.185561: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:31:37.373532: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:31:37.408388: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:37:47.732267: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:31:37.931462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.691 seconds - Optimized pipeline: 1.193 seconds - Performance speed-up: 2.257 + FP32 pipeline: 2.696 seconds + Optimized pipeline: 1.137 seconds + Performance speed-up: 2.372 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 16b3efe503aea0..9673054cc1b8e0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af0a33aba1728df2580f26f7ecf01774c2bc8084835df321d825197a945d775f -size 118088 +oid sha256:c45f05bcfa118a3d7e9024cf83963a4bb5504cb97f669c0c8261c469bab6dc75 +size 121985 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index c47f776f0af026..a5be9e2155f6cc 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebd6f6c9ee8fd3adb0dfbaebe7b41e7bdab039063a56ce21f0c6e01429d5ce6b -size 1151007 +oid sha256:2fc77a26e4c6d30ca9d21fe47ddc25624662d566f6c98ac1c160637fbcb2363b +size 1151151 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 90a6e30cff3f2e..0da4a480d88cc3 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a3b5a0b0c94dec94a9aeb46ebb0ca6af8e63897684e446a299dc31b84851ce2 -size 1148928 +oid sha256:9cfd18faba3aef57349c70b157fabd4dc2827a8bc6074252e717a67a9f9d488a +size 1148752 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 073bf8e86591fb..e992bdf66a9da7 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f41efc8140938f17d9ea5b31d699440fdcc3f3d407eba04abc4d694769f2529 -size 122071 +oid sha256:f70d76f01ee01d54713ed161de8d09713816b4f3fd7df99a5ff96c4c5e0b1bf0 +size 124539 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 5069e51978df61..ec9e6a6277d4f0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa756de5e754a64022b09b445f6851d8ce7373e09d5a776497bbf69513085cc8 -size 1150840 +oid sha256:f4dc2d892ef69df259eff4b46527d5e4382457b9b381a1e3b35b22d1d4312da4 +size 1150974 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index f698389c6f8304..64f49c2b314811 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,10 @@ Imports .. parsed-literal:: - 2024-10-08 02:44:29.489974: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:44:29.524217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:38:12.900514: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:38:12.934654: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:44:30.075180: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:38:13.484434: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,7 +211,7 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -247,7 +247,7 @@ tokenizer from HuggingFace. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -505,9 +505,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.78 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.30 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 107.63 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.67 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.46 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.19 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,27 +548,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.82 ms + [ INFO ] Read model took 18.86 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.48 ms + [ INFO ] Reshape model took 5.46 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 351.96 ms + [ INFO ] Compile model took 373.39 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +609,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 20.92 ms + [ INFO ] First inference took 24.05 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6334 iterations - [ INFO ] Duration: 120017.42 ms + [ INFO ] Count: 5730 iterations + [ INFO ] Duration: 120000.60 ms [ INFO ] Latency: - [ INFO ] Median: 18.69 ms - [ INFO ] Average: 18.85 ms - [ INFO ] Min: 17.15 ms - [ INFO ] Max: 26.62 ms - [ INFO ] Throughput: 52.78 FPS + [ INFO ] Median: 20.64 ms + [ INFO ] Average: 20.84 ms + [ INFO ] Min: 19.84 ms + [ INFO ] Max: 31.70 ms + [ INFO ] Throughput: 47.75 FPS .. code:: ipython3 @@ -646,7 +646,7 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.69 ms + [ INFO ] Read model took 25.60 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] @@ -657,7 +657,7 @@ in OpenVINO. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.39 ms + [ INFO ] Reshape model took 7.46 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] @@ -666,7 +666,7 @@ in OpenVINO. [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1128.78 ms + [ INFO ] Compile model took 1067.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +707,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 15.68 ms + [ INFO ] First inference took 17.45 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 12868 iterations - [ INFO ] Duration: 120000.02 ms + [ INFO ] Count: 13316 iterations + [ INFO ] Duration: 120006.19 ms [ INFO ] Latency: - [ INFO ] Median: 9.01 ms - [ INFO ] Average: 9.23 ms - [ INFO ] Min: 8.43 ms - [ INFO ] Max: 12.91 ms - [ INFO ] Throughput: 107.23 FPS + [ INFO ] Median: 8.92 ms + [ INFO ] Average: 8.92 ms + [ INFO ] Min: 7.66 ms + [ INFO ] Max: 14.16 ms + [ INFO ] Throughput: 110.96 FPS diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst index 348a243480aec1..3707d3f30d5f09 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -134,10 +134,10 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models .. parsed-literal:: - 2024-10-08 02:50:26.200628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:50:26.234856: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:44:02.155955: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:44:02.191160: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:50:26.890470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:44:02.863862: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -231,7 +231,6 @@ and there is no need to do it manually .. parsed-literal:: - Framework not specified. Using pt to export the model. Keyword arguments {'subfolder': '', 'trust_remote_code': False} are not expected by LatentConsistencyModelPipeline and will be ignored. @@ -241,11 +240,6 @@ and there is no need to do it manually Loading pipeline components...: 0%| | 0/7 [00:00 by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . @@ -265,43 +257,40 @@ and there is no need to do it manually OVLatentConsistencyModelPipeline { "_class_name": "OVLatentConsistencyModelPipeline", - "_diffusers_version": "0.30.3", + "_diffusers_version": "0.31.0", + "_name_or_path": "/tmp/tmpz3tx8cvr", "feature_extractor": [ "transformers", "CLIPImageProcessor" ], + "image_encoder": [ + null, + null + ], "requires_safety_checker": true, "safety_checker": [ - "stable_diffusion", - "StableDiffusionSafetyChecker" + null, + null ], "scheduler": [ "diffusers", "LCMScheduler" ], "text_encoder": [ - "optimum", + "optimum.intel.openvino.modeling_diffusion", "OVModelTextEncoder" ], - "text_encoder_2": [ - null, - null - ], "tokenizer": [ "transformers", "CLIPTokenizer" ], "unet": [ - "optimum", + "optimum.intel.openvino.modeling_diffusion", "OVModelUnet" ], - "vae_decoder": [ - "optimum", - "OVModelVaeDecoder" - ], - "vae_encoder": [ - "optimum", - "OVModelVaeEncoder" + "vae": [ + "optimum.intel.openvino.modeling_diffusion", + "OVModelVae" ] } @@ -312,15 +301,6 @@ and there is no need to do it manually ov_pipeline.to(device.value) ov_pipeline.compile() - -.. parsed-literal:: - - Compiling the vae_decoder to AUTO ... - Compiling the unet to AUTO ... - Compiling the vae_encoder to AUTO ... - Compiling the text_encoder to AUTO ... - - .. code:: ipython3 prompt = "A cute squirrel in the forest, portrait, 8k" diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg index c3ee252ebc2731..a19eb369148e10 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5596d96021ad02d2a954c32cfe4679598a3fc331bbb92df84e437b2594a5d525 -size 30142 +oid sha256:8ccbcf6c99b8d75b1c683375852111ce17983fbea50bd0b0bb294d06bbb317fb +size 38768 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png index 484f8da2cd5c03..d27692124393df 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_15_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d2a51b952179d6520da3d61f79e1ee7d57d7c22504cf712579110c07b68e63f -size 421582 +oid sha256:b936a832e23c30affb1988828f17a16e988b42413ff7dfe4381fb723e068ed97 +size 456597 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg index bce1a840efae34..e320b91964889a 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e624f572f30ee308bed067571eeb9cb035eb095eabebc0914ac8ae67605896c3 -size 33139 +oid sha256:0ab792ded8330c36072ec13fcfb0e0896ce3014413145167fd7a7c5b570919a4 +size 37604 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png index 9803cd18053390..e7a5d16aa88eb2 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output_files/latent-consistency-models-optimum-demo-with-output_8_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bd853d844aabefb69a678715b479dd74c84bd003eeb0362eb568cc592b8fc69 -size 428147 +oid sha256:f440a9e7cb3d6a9f6609165516be7e5025e26752004ca56baf570f49840af03c +size 470802 diff --git a/docs/notebooks/lcm-lora-controlnet-with-output.rst b/docs/notebooks/lcm-lora-controlnet-with-output.rst index c34511bc579b08..c6383078a89b71 100644 --- a/docs/notebooks/lcm-lora-controlnet-with-output.rst +++ b/docs/notebooks/lcm-lora-controlnet-with-output.rst @@ -236,7 +236,7 @@ Install required packages .. code:: ipython3 %pip install -q "torch" transformers "diffusers>=0.24.0" "controlnet-aux>=0.0.6" "peft>=0.6.2" accelerate --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" + %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" "matplotlib>=3.4" Prepare PyTorch models diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst new file mode 100644 index 00000000000000..435e7eaf62c53b --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst @@ -0,0 +1,509 @@ +Visual-language assistant with LLaVA and OpenVINO Generative API +================================================================ + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `OpenVINO +GenAI `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO GenAI inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO GenAI inference + pipeline <#prepare-openvino-genai-inference-pipeline>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + +- `Run model inference <#run-model-inference>`__ + + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from pathlib import Path + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + !optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path} + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = { + "mode": nncf.CompressWeightsMode.INT4_ASYM, + "group_size": 128, + "ratio": 1, + } + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO GenAI inference pipeline +----------------------------------------- + + + +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. + +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. + +.. code:: ipython3 + + from openvino_genai import VLMPipeline, GenerationConfig + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +For pipeline initialization we should provide path to model directory +and inference device. + +.. code:: ipython3 + + ov_model = VLMPipeline(str(model_base_path / model_variant.value), device=device.value) + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. + +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + import numpy as np + + config = GenerationConfig() + config.max_new_tokens = 100 + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte) + return image, ov.Tensor(image_data) + + + def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + + image, image_tensor = load_image(image_file) + text_message = "What is unusual on this image?" + + prompt = text_message + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + display(image) + print(f"Question:\n{text_message}") + print("Answer:") + output = ov_model.generate(prompt, image=image_tensor, generation_config=config, streamer=streamer) + + + +.. image:: llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png + + +.. parsed-literal:: + + Question: + What is unusual on this image? + Answer: + + The unusual aspect of this image is that a cat is lying inside a cardboard box. Cats are known for their curiosity and love for small, enclosed spaces. However, it is not a common sight to see a cat comfortably resting inside a cardboard box. + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava + + demo = make_demo_llava(ov_model) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst new file mode 100644 index 00000000000000..3c88a8e94388d4 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst @@ -0,0 +1,508 @@ +Visual-language assistant with LLaVA and Optimum Intel OpenVINO integration +=========================================================================== + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `Optimum +Intel `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +The tutorial consists from following steps: + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO-based inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO based inference + pipeline <#prepare-openvino-based-inference-pipeline>`__ +- `Run model inference <#run-model-inference>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers openvino openvino-genai + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from pathlib import Path + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + !optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path} + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1, "all_layers": True} + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO based inference pipeline +----------------------------------------- + + + +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers LLaVA implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForVisualCausalLM + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ov_model = OVModelForVisualCausalLM.from_pretrained(model_base_path / model_variant.value, device=device.value) + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, we will use tokenizer and image processor +defined in the begging of our tutorial. For alignment with original +PyTorch implementation we will use PyTorch tensors as input. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + from transformers import AutoProcessor, AutoConfig + + config = AutoConfig.from_pretrained(model_path) + + processor = AutoProcessor.from_pretrained( + model_path, patch_size=config.vision_config.patch_size, vision_feature_select_strategy=config.vision_feature_select_strategy + ) + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + return image + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + text_message = "What is unusual on this image?" + + image = load_image(image_file) + + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": text_message}, + {"type": "image"}, + ], + }, + ] + + prompt = processor.apply_chat_template(conversation, add_generation_prompt=True) + + inputs = processor(images=image, text=prompt, return_tensors="pt") + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +Generation process for long response maybe time consuming, for accessing +partial result as soon as it is generated without waiting when whole +process finished, Streaming API can be used. Token streaming is the mode +in which the generative system returns the tokens one by one as the +model generates them. This enables showing progressive generations to +the user rather than waiting for the whole generation. Streaming is an +essential aspect of the end-user experience as it reduces latency, one +of the most critical aspects of a smooth experience. You can find more +details about how streaming work in `HuggingFace +documentation `__. + +Also for simplification of preparing input in conversational mode, we +will use Conversation Template helper provided by model authors for +accumulating history of provided messages and images. + +.. code:: ipython3 + + from transformers import TextStreamer + + # Prepare + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + display(image) + print(f"Question: {text_message}") + print("Answer:") + + output_ids = ov_model.generate( + **inputs, + do_sample=False, + max_new_tokens=50, + streamer=streamer, + ) + + + +.. image:: llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png + + +.. parsed-literal:: + + Question: What is unusual on this image? + Answer: + The unusual aspect of this image is that a cat is lying inside a cardboard box, which is not a typical place for a cat to rest. Cats are known for their curiosity and love for small, enclosed spaces, but in this case + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava_optimum + + demo = make_demo_llava_optimum(ov_model, processor) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-with-output.rst deleted file mode 100644 index fde37625041d43..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output.rst +++ /dev/null @@ -1,1342 +0,0 @@ -Visual-language assistant with LLaVA and OpenVINO -================================================= - -`LLaVA `__ (Large Language and Vision -Assistant) is large multimodal model that aims to develop a -general-purpose visual assistant that can follow both language and image -instructions to complete various real-world tasks. The idea is to -combine the power of large language models (LLMs) with vision encoders -like CLIP to create an end-to-end trained neural assistant that -understands and acts upon multimodal instructions. - -In the field of artificial intelligence, the goal is to create a -versatile assistant capable of understanding and executing tasks based -on both visual and language inputs. Current approaches often rely on -large vision models that solve tasks independently, with language only -used to describe image content. While effective, these models have fixed -interfaces with limited interactivity and adaptability to user -instructions. On the other hand, large language models (LLMs) have shown -promise as a universal interface for general-purpose assistants. By -explicitly representing various task instructions in language, these -models can be guided to switch and solve different tasks. To extend this -capability to the multimodal domain, the `LLaVA -paper `__ introduces \`visual -instruction-tuning, a novel approach to building a general-purpose -visual assistant. - -In this tutorial we consider how to use LLaVA model to build multimodal -chatbot. For demonstration purposes we will use -`LLaVA-Lightning-MPT-7B-preview `__ -model for conversion, similar steps required to run other models from -`LLaVA Model -Zoo `__. - -The tutorial consists from following steps: - -- Install prerequisites -- Prepare input processor and tokenizer -- Download original model -- Compress model weights to 4 and 8 bits using NNCF -- Convert model to OpenVINO Intermediate Representation (IR) format -- Prepare OpenVINO-based inference pipeline -- Run OpenVINO model - - -**Table of contents:** - - -- `About model <#about-model>`__ -- `Prerequisites <#prerequisites>`__ -- `Build model tokenizer and image - processor <#build-model-tokenizer-and-image-processor>`__ -- `Build model and convert it to OpenVINO IR - format <#build-model-and-convert-it-to-openvino-ir-format>`__ - - - `Prepare helpers for model - conversion <#prepare-helpers-for-model-conversion>`__ - - `Convert and Optimize Model <#convert-and-optimize-model>`__ - - - `Instantiate PyTorch model <#instantiate-pytorch-model>`__ - - `Compress Model weights to 4 and 8 bits using - NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - - `Convert model to OpenVINO IR - format <#convert-model-to-openvino-ir-format>`__ - -- `Prepare OpenVINO based inference - pipeline <#prepare-openvino-based-inference-pipeline>`__ -- `Run model inference <#run-model-inference>`__ - - - `Select inference device <#select-inference-device>`__ - - `Load OpenVINO model <#load-openvino-model>`__ - - `Prepare input data <#prepare-input-data>`__ - - `Test model inference <#test-model-inference>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -About model ------------ - - - -LLaVA connects pre-trained `CLIP -ViT-L/14 `__ visual encoder and large -language model like Vicuna, LLaMa v2 or MPT, using a simple projection -matrix - -.. figure:: https://llava-vl.github.io/images/llava_arch.png - :alt: vlp_matrix.png - - vlp_matrix.png - -Model training procedure consists of 2 stages: - -- Stage 1: Pre-training for Feature Alignment. Only the projection - matrix is updated, based on a subset of CC3M. -- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM - are updated for two different use scenarios: - - - Visual Chat: LLaVA is fine-tuned on our generated multimodal - instruction-following data for daily user-oriented applications. - - Science QA: LLaVA is fine-tuned on this multimodal reasoning - dataset for the science domain. - -More details about model can be found in original `project -web-page `__, -`paper `__ and -`repo `__. - -Prerequisites -------------- - - - -Install required dependencies - -.. code:: ipython3 - - import sys - - %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" "nncf>=2.7.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.37.2" "gradio>=4.19" "einops" - -.. code:: ipython3 - - from pathlib import Path - - repo_dir = Path("LLaVA") - - if not repo_dir.exists(): - !git clone https://github.com/haotian-liu/LLaVA.git - - sys.path.insert(0, str(repo_dir.resolve())) - -Build model tokenizer and image processor ------------------------------------------ - - - -For starting work with model, we need understand how to prepare input -data first. As it is already discussed before, LLaVA is multimodal model -that accepts input user instructions in text format and image for -analysis. In the same time, LLaVA is combination of 2 fundamental -pretrained models for text and image processing, CLIP and MPT, each of -them has own approach for preparing data - tokenization for input text -and preprocessing for input image. LLaVA reuses these steps with small -adoption: introduced special tokens that serves for specification of -image location in the text that should be injected in provided user -instruction. - -.. code:: ipython3 - - from transformers import AutoTokenizer, AutoConfig, CLIPImageProcessor - from llava.model.language_model.llava_mpt import LlavaMptForCausalLM - - model_id = "liuhaotian/LLaVA-Lightning-MPT-7B-preview" - - config = AutoConfig.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) - image_processor = CLIPImageProcessor.from_pretrained(config.mm_vision_tower) - - -.. parsed-literal:: - - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - - -.. code:: ipython3 - - from llava.constants import ( - DEFAULT_IMAGE_PATCH_TOKEN, - DEFAULT_IM_START_TOKEN, - DEFAULT_IM_END_TOKEN, - DEFAULT_IMAGE_TOKEN, - ) - - mm_use_im_start_end = getattr(config, "mm_use_im_start_end", False) - mm_use_im_patch_token = getattr(config, "mm_use_im_patch_token", True) - if mm_use_im_patch_token: - tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) - if mm_use_im_start_end: - tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True) - - if hasattr(config, "max_sequence_length"): - context_len = config.max_sequence_length - else: - context_len = 2048 - -Build model and convert it to OpenVINO IR format ------------------------------------------------- - - - -LLaVA is autoregressive transformer generative model, it means that each -next model step depends from model output from previous step. The -generation approach is based on the assumption that the probability -distribution of a word sequence can be decomposed into the product of -conditional next word distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of string token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in the -`documentation `__. -To preserve flexibility in the selection decoding methodology, we will -convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``token_embedding`` and -``image_encoder`` models, after that LLM-based part of model runs on -input embeddings to predict probability of next generated tokens. On the -next step, model accepts only next token id selected based on sampling -strategy and cached attention key and values. Since the output side is -auto-regressive, an output token hidden state remains the same once -computed for every further generation step. Therefore, recomputing it -every time you want to generate a new token seems wasteful. With the -cache, the model saves the hidden state once it has been computed. The -model only computes the one for the most recently generated output token -at each time step, re-using the saved ones for hidden tokens. This -reduces the generation complexity from :math:`O(n^3)` to :math:`O(n^2)` -for a transformer model. More details about how it works can be found in -this -`article `__. - -Prepare helpers for model conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -The code below prepares function for converting LLaVA model to OpenVINO -Intermediate Representation format. It splits model on parts described -above, prepare example inputs for each part and convert each part using -`OpenVINO Model Conversion -API `__. -``ov.convert_model`` function accepts PyTorch model instance and returns -``ov.Model`` object that represent model in OpenVINO format. It is ready -to use for loading on device using ``ov.compile_model`` or can be saved -on disk using ``ov.save_model``. - -.. code:: ipython3 - - from functools import wraps - import gc - import warnings - import torch - import openvino as ov - import nncf - from typing import Optional, Tuple, List - import torch.nn.functional as F - - warnings.filterwarnings("ignore") - - - class ModelWrapper(torch.nn.Module): - """ - Model wrapper class for export for spliting original forward logic on preparing multimodal data and inference using it. - That allows us to sperate image encoder and token embeddings model from general flow. - """ - - def __init__(self, model): - super().__init__() - self.model = model - - def forward( - self, - input_ids: torch.LongTensor = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - attention_mask: Optional[torch.Tensor] = None, - ): - outputs = self.model.transformer( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - past_key_values=past_key_values, - attention_mask=attention_mask, - return_dict=True, - output_attentions=False, - output_hidden_states=False, - use_cache=True, - ) - logits = F.linear( - outputs.last_hidden_state.to(self.model.transformer.wte.weight.device), - self.model.transformer.wte.weight.to(outputs.last_hidden_state.dtype), - ) - - return (logits, tuple(outputs.past_key_values)) - - - def patch_model_forward(model): - """ - Helper function for patching model forward for model with past. - It makes model more convinient for export to TorchScript format avoiding limitation - that list of tensors can not be correctly traced as model input - """ - - orig_forward = model.forward - - @wraps(orig_forward) - def ts_patched_forward( - input_ids: torch.Tensor, - past_key_values: Tuple[Tuple[torch.Tensor]], - attention_mask: torch.LongTensor, - ): - pkv_list = list(past_key_values) - outs = orig_forward( - input_ids=input_ids, - past_key_values=pkv_list, - attention_mask=attention_mask, - ) - return outs - - model.forward = ts_patched_forward - return model - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - - def convert_llava_mpt( - pt_model: torch.nn.Module, - model_path: Path, - image_encoder_wc_parameters: Optional[dict] = None, - llava_wc_parameters: Optional[dict] = None, - ): - """ - LLaVA MPT model conversion function - - Params: - pt_model: PyTorch model - model_path: path for saving model - Returns: - None - """ - ov_out_path = Path(model_path) - pt_model.config.save_pretrained(ov_out_path) - pt_model.config.use_cache = True - pt_model.config.torchscript = True - first_stage_model_path = ov_out_path / "llava_input_embed.xml" - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - second_stage_model_path = ov_out_path / "llava_with_past.xml" - if not image_encoder_path.exists(): - model.forward = model.encode_images - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 224, 224)), - input=[(-1, 3, 224, 224)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - if not token_embedding_model_path.exists(): - model.forward = model.get_model().embed_tokens - ov_model = ov.convert_model(model, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - if first_stage_model_path.exists() and second_stage_model_path.exists(): - print("LLaVA model successfully converted") - del pt_model - return - model_wrap = ModelWrapper(model) - example_input_first_stage = { - "inputs_embeds": torch.zeros((1, 307, 4096)), - "attention_mask": torch.ones((1, 307), dtype=torch.long), - } - outs = model_wrap(**example_input_first_stage) - inputs = ["input_ids"] - outputs = ["logits"] - dynamic_shapes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} - for idx in range(len(outs[1])): - inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) - dynamic_shapes[inputs[-1]] = {2: "past_sequence + sequence"} - dynamic_shapes[inputs[-2]] = {2: "past_sequence + sequence"} - outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) - - inputs.extend(["attention_mask"]) - if not first_stage_model_path.exists(): - ov_model = ov.convert_model(model_wrap, example_input=example_input_first_stage) - ov_model = postprocess_converted_model(ov_model, output_names=outputs) - if llava_wc_parameters is not None: - print("Applying weight compression to first stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, first_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - if not second_stage_model_path.exists(): - model_wrap = patch_model_forward(model_wrap) - example_input_second_stage = { - "input_ids": torch.ones((1, 1), dtype=torch.long), - "past_key_values": outs[1], - "attention_mask": torch.ones((1, outs[1][-1][-1].shape[-2] + 1), dtype=torch.long), - } - ov_model = ov.convert_model(model_wrap, example_input=example_input_second_stage) - ov_model = postprocess_converted_model( - ov_model, - example_input=example_input_second_stage.values(), - input_names=inputs, - output_names=outputs, - dynamic_shapes=dynamic_shapes, - ) - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, second_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("LLaVA model successfully converted") - del model_wrap - del pt_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino - - -Convert and Optimize Model -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Our model conversion and optimization consist of following steps: 1. -Download original PyTorch model. 2. Compress model weights using NNCF 3. -Convert model to OpenVINO format and save it on disk. - -Let’s consider each step more deeply. - -Instantiate PyTorch model -^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For creating PyTorch model we should use ``from_pretrained`` method of -``LlavaMPTForCausalLM`` model class. Model weights will be downloaded -from `HuggingFace hub `__ during first -run. It may takes some time and requires at least 13 Gb free space on -disk. - -Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For reducing memory consumption, weights compression optimization can be -applied using `NNCF `__. Weight -compression aims to reduce the memory footprint of a model. It can also -lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). LLMs and other models, -which require extensive memory to store the weights during inference, -can benefit from weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined above. - -Please select below whether you would like to run INT4 weight -compression instead of INT8 weight compression. - -.. code:: ipython3 - - import ipywidgets as widgets - - compression_mode = widgets.Dropdown( - options=["INT4", "INT8"], - value="INT4", - description="Compression mode:", - disabled=False, - ) - - compression_mode - - - - -.. parsed-literal:: - - Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') - - - -.. code:: ipython3 - - if compression_mode.value == "INT4": - compressed_model_dir = Path("llava-mpt/INT4_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) - else: - compressed_model_dir = Path("llava-mpt/INT8_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - if not compressed_model_dir.exists(): - compressed_model_dir.mkdir(exist_ok=True, parents=True) - config.save_pretrained(compressed_model_dir) - model = LlavaMptForCausalLM.from_pretrained(model_id) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - if mm_use_im_start_end: - model.resize_token_embeddings(len(tokenizer)) - - model.eval() - with torch.no_grad(): - convert_llava_mpt( - model, - compressed_model_dir, - image_encoder_wc_parameters=dict(mode=nncf.CompressWeightsMode.INT8), - llava_wc_parameters=llava_wc_parameters, - ) - del model - gc.collect(); - - - -.. parsed-literal:: - - Loading checkpoint shards: 0%| | 0/2 [00:00`__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - import numpy as np - import torch - - - class OVLlavaMPTForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.token_embed = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.model_input_embed = core.compile_model(model_dir / "llava_input_embed.xml", device) - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, prefix_mask, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = {} - if past_key_values is not None: - # Flatten the past_key_values - attention_mask = torch.ones( - (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), - dtype=input_ids.dtype, - ) - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - # Add the past_key_values to the decoder inputs - inputs = dict(zip(self.key_value_input_names, past_key_values)) - - else: - return self.forward_with_image(input_ids, images, attention_mask) - inputs["input_ids"] = np.array(input_ids) - - if "attention_mask" in self.input_names: - inputs["attention_mask"] = np.array(attention_mask) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def forward_with_image(self, input_ids, images, attention_mask): - """First step inference method, that resolves multimodal data""" - input_embed, attention_mask = self.prepare_multimodal_input(input_ids, images, attention_mask) - outs = self.model_input_embed([input_embed, attention_mask]) - logits = outs[0] - pkv = list(outs.values())[1:] - pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) - return CausalLMOutputWithPast(logits=torch.from_numpy(logits), past_key_values=pkv) - - def prepare_multimodal_input(self, input_ids, images, attention_mask): - """Preprocessing function for embedding multimodal data""" - image_features = [] - if images is not None: - image_features = self.image_encoder(images)[0] - - new_input_embeds = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - if (cur_input_ids == IMAGE_TOKEN_INDEX).sum() == 0: - # multimodal LLM, but the current sample is not multimodal - cur_input_embeds = torch.from_numpy(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - new_input_embeds.append(cur_input_embeds) - cur_image_idx += 1 - continue - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - cur_new_input_embeds = [] - while image_token_indices.numel() > 0: - cur_image_features = image_features[cur_image_idx] - image_token_start = image_token_indices[0] - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - embd = self.token_embed(cur_input_ids[: image_token_start - 1].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - embd = self.token_embed(cur_input_ids[image_token_start - 1 : image_token_start].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - cur_new_input_embeds.append(cur_image_features) - embd = self.token_embed(cur_input_ids[image_token_start + 1 : image_token_start + 2].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids[:image_token_start].unsqueeze(0))[0][0]) - cur_new_input_embeds.append(cur_image_features) - cur_image_idx += 1 - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_input_ids = cur_input_ids[image_token_start + 2 :] - else: - cur_input_ids = cur_input_ids[image_token_start + 1 :] - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - if cur_input_ids.numel() > 0: - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - cur_new_input_embeds = [torch.from_numpy(x) for x in cur_new_input_embeds] - cur_new_input_embeds = torch.cat(cur_new_input_embeds, dim=0) - new_input_embeds.append(cur_new_input_embeds) - - if any(x.shape != new_input_embeds[0].shape for x in new_input_embeds): - max_len = max(x.shape[0] for x in new_input_embeds) - - new_input_embeds_align = [] - for cur_new_embed in new_input_embeds: - cur_new_embed = torch.cat( - ( - cur_new_embed, - torch.zeros( - (max_len - cur_new_embed.shape[0], cur_new_embed.shape[1]), - dtype=cur_new_embed.dtype, - ), - ), - dim=0, - ) - new_input_embeds_align.append(cur_new_embed) - new_input_embeds = torch.stack(new_input_embeds_align, dim=0) - - if attention_mask is not None: - new_attention_mask = [] - for cur_attention_mask, cur_new_labels, cur_new_labels_align in zip(attention_mask, _new_labels, new_labels): - new_attn_mask_pad_left = torch.full( - (cur_new_labels.shape[0] - labels.shape[1],), - True, - dtype=attention_mask.dtype, - ) - new_attn_mask_pad_right = torch.full( - (cur_new_labels_align.shape[0] - cur_new_labels.shape[0],), - False, - dtype=attention_mask.dtype, - ) - cur_new_attention_mask = torch.cat( - ( - new_attn_mask_pad_left, - cur_attention_mask, - new_attn_mask_pad_right, - ), - dim=0, - ) - new_attention_mask.append(cur_new_attention_mask) - attention_mask = torch.stack(new_attention_mask, dim=0) - assert attention_mask.shape == new_labels.shape - else: - new_input_embeds = torch.stack(new_input_embeds, dim=0) - - if attention_mask is not None: - new_attn_mask_pad_left = torch.full( - ( - attention_mask.shape[0], - new_input_embeds.shape[1] - input_ids.shape[1], - ), - True, - dtype=attention_mask.dtype, - ) - attention_mask = torch.cat((new_attn_mask_pad_left, attention_mask), dim=1) - assert attention_mask.shape == new_input_embeds.shape[:2] - - return new_input_embeds, attention_mask - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - if not kwargs.get("use_cache", True): - raise NotImplementedError("MPT with prefix_lm=True does not support use_cache=False.") - else: - prefix_mask = None - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "prefix_mask": prefix_mask, - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - -Run model inference -------------------- - - - -Now, when we have model and defined generation pipeline, we can run -model inference. - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -Select device from dropdown list for running inference using OpenVINO. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import device_widget - - device = device_widget(exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') - - - -Load OpenVINO model -~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - core = ov.Core() - - ov_model = OVLlavaMPTForCausalLM(core, compressed_model_dir, device.value) - -Prepare input data -~~~~~~~~~~~~~~~~~~ - - - -For preparing input data, we will use tokenizer and image processor -defined in the begging of our tutorial. For alignment with original -PyTorch implementation we will use PyTorch tensors as input. - -.. code:: ipython3 - - import requests - from PIL import Image - from io import BytesIO - - - def load_image(image_file): - if image_file.startswith("http") or image_file.startswith("https"): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - return image - - - image_file = "https://llava-vl.github.io/static/images/view.jpg" - - image = load_image(image_file) - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - - text_message = "What are the things I should be cautious about when I visit here?" - print(f"Question: {text_message}") - image - - -.. parsed-literal:: - - Question: What are the things I should be cautious about when I visit here? - - - - -.. image:: llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png - - - -Test model inference -~~~~~~~~~~~~~~~~~~~~ - - - -Generation process for long response maybe time consuming, for accessing -partial result as soon as it is generated without waiting when whole -process finished, Streaming API can be used. Token streaming is the mode -in which the generative system returns the tokens one by one as the -model generates them. This enables showing progressive generations to -the user rather than waiting for the whole generation. Streaming is an -essential aspect of the end-user experience as it reduces latency, one -of the most critical aspects of a smooth experience. You can find more -details about how streaming work in `HuggingFace -documentation `__. - -Also for simplification of preparing input in conversational mode, we -will use Conversation Template helper provided by model authors for -accumulating history of provided messages and images. - -.. code:: ipython3 - - from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria - from llava.constants import IMAGE_TOKEN_INDEX - from transformers import TextStreamer - from llava.conversation import conv_templates, SeparatorStyle - - # Prepare - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - conv_mode = "mpt" - - conv = conv_templates[conv_mode].copy() - roles = ("user", "assistant") - - if mm_use_im_start_end: - inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + text_message - else: - inp = DEFAULT_IMAGE_TOKEN + "\n" + text_message - conv.append_message(conv.roles[0], inp) - conv.append_message(conv.roles[1], None) - - prompt = conv.get_prompt() - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - print("Answer:") - - output_ids = ov_model.generate( - input_ids, - images=image_tensor, - do_sample=True, - temperature=0.2, - max_new_tokens=1024, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - -.. parsed-literal:: - - Answer: - When visiting this location, I should be cautious about the water level and the presence of boats. The image shows a dock with a boat in the water, and the water appears to be relatively shallow. It is essential to be mindful of the water depth when approaching the dock, as it could be dangerous to step into the water without checking the water level. Additionally, I should be aware of the boats in the water, as they could pose a risk if they are not properly secured or if they are not being used as intended. It is crucial to maintain a safe distance from the boats and follow any posted signs or guidelines to ensure a safe and enjoyable experience. - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from threading import Event, Thread - from transformers import TextIteratorStreamer - - conv = conv_templates[conv_mode].copy() - conv.messages = [] - - - def clear_history(textbox, imagebox, chatbot): - """ - callback function for clearing chat windows in interface on clear button click - - Params: - textbox: current textbox for user messages state - imagebox: current imagebox state - chatbot: current chatbot state - Returns: - empty textbox, imagebox and chatbot states - """ - conv.messages = [] - - return None, None, None - - - def handle_user_message(message, history): - """ - callback function for updating user messages in interface on submit button click - - Params: - message: current message - history: conversation history - Returns: - updated message and conversation history - """ - # Append the user's message to the conversation history - return "", history + [[message, ""]] - - - def run_chatbot(image, history, temperature=0.2, top_p=0.7, max_new_tokens=1024): - """ - callback function for running chatbot on submit button click - - Params: - history: conversation history - temperature: parameter for control the level of creativity in AI-generated text. - By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. - top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. - - """ - - text = history[-1][0] - if len(text) <= 0 and image is None: - conv.skip_next = True - yield history - text = text[:1536] # Hard cut-off - if image is not None: - text = text[:1200] # Hard cut-off for images - if "" not in text: - text = text + "\n" - text = (text, image, "Resize") - conv.append_message(conv.roles[0], text) - conv.append_message(conv.roles[1], None) - conv.skip_next = False - - # Construct the input message string for the model by concatenating the current system message and conversation history - prompt = conv.get_prompt() - image = conv.get_images(return_pil=True) - if not image: - image_tensor = None - else: - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - # Tokenize the messages string - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - generate_kwargs = dict( - input_ids=input_ids, - images=image_tensor, - max_new_tokens=max_new_tokens, - temperature=temperature, - do_sample=temperature > 0.001, - top_p=top_p, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - stream_complete = Event() - - def generate_and_signal_complete(): - """ - genration function for single thread - """ - ov_model.generate(**generate_kwargs) - stream_complete.set() - - t1 = Thread(target=generate_and_signal_complete) - t1.start() - - # Initialize an empty string to store the generated text - partial_text = "" - for new_text in streamer: - if not new_text: - continue - partial_text += new_text - conv.messages[-1][-1] = partial_text - history[-1][1] = partial_text - yield history - -.. code:: ipython3 - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo_llava - - demo = make_demo_llava(handle_user_message=handle_user_message, run_chatbot=run_chatbot, clear_history=clear_history) - - try: - demo.queue(max_size=2).launch(debug=False) - except Exception: - demo.queue(max_size=2).launch(share=True, debug=False) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg deleted file mode 100644 index 29fc338b516a09..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f825c10443339b42cb5e2415f48bb7bafb4e087fb29bce6d2feaf3c2f89788c8 -size 72374 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png deleted file mode 100644 index c1062ffb3d6d10..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dde262e54da6d8dad5062989d7863db7cd85ac0403b9015a76f5884472f67ceb -size 599941 diff --git a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst index b903b7d5081d94..08ca24744bf216 100644 --- a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst +++ b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst @@ -397,7 +397,7 @@ For example, to register your own image generation tool: In this notebook, we will create 3 tools as examples: - **image_generation**: AI painting (image generation) service, input text - description, and return the image URL drawn based on text information. + description, and return the image URL drawn based on text information. - **get_current_weather**: Get the current weather in a given city name. - **wikipedia**: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst new file mode 100644 index 00000000000000..791355276fd2fd --- /dev/null +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -0,0 +1,560 @@ +Create a native Agent with OpenVINO +=================================== + +LLM are limited to the knowledge on which they have been trained and the +additional knowledge provided as context, as a result, if a useful piece +of information is missing the provided knowledge, the model cannot “go +around” and try to find it in other sources. This is the reason why we +need to introduce the concept of Agents. + +The core idea of agents is to use a language model to choose a sequence +of actions to take. In agents, a language model is used as a reasoning +engine to determine which actions to take and in which order. Agents can +be seen as applications powered by LLMs and integrated with a set of +tools like search engines, databases, websites, and so on. Within an +agent, the LLM is the reasoning engine that, based on the user input, is +able to plan and execute a set of actions that are needed to fulfill the +request. + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/22fa5396-8381-400f-a78f-97e25d57d807 + :alt: agent + + agent + +This example will demonstrate how to create a native agent with +OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Create LLM as agent <#create-llm-as-agent>`__ + + - `Download model <#select-model>`__ + - `Select inference device for + LLM <#select-inference-device-for-llm>`__ + - `Instantiate LLM using Optimum + Intel <#instantiate-llm-using-optimum-intel>`__ + - `Create text generation method <#create-text-generation-method>`__ + +- `Create prompt template <#create-prompt-template>`__ +- `Create parser <#create-parers>`__ +- `Create tools calling <#create-tool-calling>`__ +- `Run agent <#run-agent>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import os + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + from pip_helper import pip_install + + pip_install( + "-q", + "--extra-index-url", + "https://download.pytorch.org/whl/cpu", + "transformers>=4.43.1", + ) + pip_install("-q", "git+https://github.com/huggingface/optimum-intel.git", "git+https://github.com/openvinotoolkit/nncf.git", "datasets", "accelerate") + pip_install("--pre", "-Uq", "openvino>=2024.4.0", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") + +Create LLM as agent +------------------- + + + +Download LLM +~~~~~~~~~~~~ + + + +To run LLM locally, we have to download the model in the first step. It +is possible to `export your +model `__ +to the OpenVINO IR format with the CLI, and load the model from local +folder. + +Large Language Models (LLMs) are a core component of agent. LlamaIndex +does not serve its own LLMs, but rather provides a standard interface +for interacting with many different LLMs. In this example, we can select +``Qwen2.5`` as LLM in agent pipeline. \* +**qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - +Qwen2.5 is the latest series of Qwen large language models. Comparing +with Qwen2, Qwen2.5 series brings significant improvements in coding, +mathematics and general knowledge skills. Additionally, it brings +long-context and multiple languages support including Chinese, English, +French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, +Vietnamese, Thai, Arabic, and more. For more details, please refer to +`model_card `__, +`blog `__, +`GitHub `__, and +`Documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] + + llm_model_id = widgets.Dropdown( + options=llm_model_ids, + value=llm_model_ids[0], + description="Model:", + disabled=False, + ) + + llm_model_id + + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('Qwen/Qwen2.5-3B-Instruct', 'Qwen/Qwen2.5-7B-Instruct', 'Qwen/qwen2.5-… + + + +.. code:: ipython3 + + from pathlib import Path + + llm_model_path = llm_model_id.value.split("/")[-1] + + if not Path(llm_model_path).exists(): + !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} + +Select inference device for LLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + llm_device = device_widget("CPU", exclude=["NPU"]) + + llm_device + + +.. parsed-literal:: + + [ERROR] 20:00:52.380 [NPUBackends] Cannot find backend for inference. Make sure the device is available. + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + + + +Instantiate LLM using Optimum Intel +----------------------------------- + + + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Below is an example of the RedPajama model + +.. code:: diff + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + model_id = "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + +Model class initialization starts with calling ``from_pretrained`` +method. When downloading and converting Transformers model, the +parameter ``export=True`` should be added (as we already converted model +before, we do not need to provide this parameter). We can save the +converted model for the next usage with the ``save_pretrained`` method. +Tokenizer class and pipelines API are compatible with Optimum models. + +You can find more details about OpenVINO LLM inference using HuggingFace +Optimum API in `LLM inference +guide `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, AutoConfig, TextStreamer + from transformers.generation import ( + StoppingCriteriaList, + StoppingCriteria, + ) + import openvino.properties as props + import openvino.properties.hint as hints + import openvino.properties.streams as streams + + import json + import json5 + import torch + + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) + + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} + + llm = OVModelForCausalLM.from_pretrained( + llm_model_path, + device=llm_device.value, + ov_config=ov_config, + config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), + trust_remote_code=True, + ) + + llm.generation_config.top_k = 1 + llm.generation_config.max_length = 2000 + +Create text generation method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +In this example, we would like to stream the output text though +``TextStreamer``, and stop text generation before ``Observation`` +received from tool calling.. + +.. code:: ipython3 + + class StopSequenceCriteria(StoppingCriteria): + """ + This class can be used to stop generation whenever a sequence of tokens is encountered. + + Args: + stop_sequences (`str` or `List[str]`): + The sequence (or list of sequences) on which to stop execution. + tokenizer: + The tokenizer used to decode the model outputs. + """ + + def __init__(self, stop_sequences, tokenizer): + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + self.stop_sequences = stop_sequences + self.tokenizer = tokenizer + + def __call__(self, input_ids, scores, **kwargs) -> bool: + decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) + return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) + + + def text_completion(prompt: str, stop_words) -> str: + im_end = "<|im_end|>" + if im_end not in stop_words: + stop_words = stop_words + [im_end] + streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) + + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) + input_ids = torch.tensor([tokenizer.encode(prompt)]) + generate_kwargs = dict( + input_ids=input_ids, + streamer=streamer, + stopping_criteria=stopping_criteria, + ) + output = llm.generate(**generate_kwargs) + output = output.tolist()[0] + output = tokenizer.decode(output, errors="ignore") + assert output.startswith(prompt) + output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") + + for stop_str in stop_words: + idx = output.find(stop_str) + if idx != -1: + output = output[: idx + len(stop_str)] + return output + +Create prompt template +---------------------- + + + +A prompt for a language model is a set of instructions or input provided +by a user to guide the model’s response, helping it understand the +context and generate relevant and coherent language-based output, such +as answering questions, completing sentences, or engaging in a +conversation. + +Different agents have different prompting styles for reasoning. In this +example, we will use `ReAct agent `__ with +its typical prompt template. For a full list of built-in agents see +`agent +types `__. + +.. figure:: https://github.com/user-attachments/assets/c26432c2-3cf1-4942-ae03-fd8e8ebb4509 + :alt: react + + react + +A ReAct prompt consists of few-shot task-solving trajectories, with +human-written text reasoning traces and actions, as well as environment +observations in response to actions. ReAct prompting is intuitive and +flexible to design, and achieves state-of-the-art few-shot performances +across a variety of tasks, from question answering to online shopping! + +In an prompt template for agent, ``query`` is user’s query and other +parameter should be a sequence of messages that contains the +``descriptions`` and ``parameters`` of agent tool. + +.. code:: ipython3 + + TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" + + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: + + {tools_text} + + Use the following format: + + Question: the input question you must answer + Thought: you should always think about what to do + Action: the action to take, should be one of [{tools_name_text}] + Action Input: the input to the action + Observation: the result of the action + ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) + Thought: I now know the final answer + Final Answer: the final answer to the original input question + + Begin! + + Question: {query}""" + +Meanwhile we have to create function for consolidate the tools +information and conversation history into the prompt template. + +.. code:: ipython3 + + def build_input_text(chat_history, list_of_tool_info) -> str: + tools_text = [] + for tool_info in list_of_tool_info: + tool = TOOL_DESC.format( + name_for_model=tool_info["name_for_model"], + name_for_human=tool_info["name_for_human"], + description_for_model=tool_info["description_for_model"], + parameters=json.dumps(tool_info["parameters"], ensure_ascii=False), + ) + if tool_info.get("args_format", "json") == "json": + tool += " Format the arguments as a JSON object." + elif tool_info["args_format"] == "code": + tool += " Enclose the code within triple backticks (`) at the beginning and end of the code." + else: + raise NotImplementedError + tools_text.append(tool) + tools_text = "\n\n".join(tools_text) + + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) + + messages = [{"role": "system", "content": "You are a helpful assistant."}] + for i, (query, response) in enumerate(chat_history): + if list_of_tool_info: + if (len(chat_history) == 1) or (i == len(chat_history) - 2): + query = PROMPT_REACT.format( + tools_text=tools_text, + tools_name_text=tools_name_text, + query=query, + ) + if query: + messages.append({"role": "user", "content": query}) + if response: + messages.append({"role": "assistant", "content": response}) + + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") + + return prompt + +Create parser +------------- + + + +A Parser is used to convert raw output of LLM to the input arguments of +tools. + +.. code:: ipython3 + + def parse_latest_tool_call(text): + tool_name, tool_args = "", "" + i = text.rfind("\nAction:") + j = text.rfind("\nAction Input:") + k = text.rfind("\nObservation:") + if 0 <= i < j: # If the text has `Action` and `Action input`, + if k < j: # but does not contain `Observation`, + # then it is likely that `Observation` is ommited by the LLM, + # because the output text may have discarded the stop word. + text = text.rstrip() + "\nObservation:" # Add it back. + k = text.rfind("\nObservation:") + tool_name = text[i + len("\nAction:") : j].strip() + tool_args = text[j + len("\nAction Input:") : k].strip() + text = text[:k] + return tool_name, tool_args, text + +Create tools calling +-------------------- + + + +In this examples, we will create 2 customized tools for +``image generation`` and ``weather qurey``. A detailed description of +these tools should be defined in json format, which will be used as part +of prompt. + +.. code:: ipython3 + + tools = [ + { + "name_for_human": "get weather", + "name_for_model": "get_weather", + "description_for_model": 'Get the current weather in a given city name."', + "parameters": [ + { + "name": "city_name", + "description": "City name", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + { + "name_for_human": "image generation", + "name_for_model": "image_gen", + "description_for_model": "AI painting (image generation) service, input text description, and return the image URL drawn based on text information.", + "parameters": [ + { + "name": "prompt", + "description": "describe the image", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + ] + +Then we should implement these tools with inputs and outputs, and +execute them according to the output of LLM. + +.. code:: ipython3 + + def call_tool(tool_name: str, tool_args: str) -> str: + if tool_name == "get_weather": + city_name = json5.loads(tool_args)["city_name"] + key_selection = { + "current_condition": [ + "temp_C", + "FeelsLikeC", + "humidity", + "weatherDesc", + "observation_time", + ], + } + resp = requests.get(f"https://wttr.in/{city_name}?format=j1") + resp.raise_for_status() + resp = resp.json() + ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()} + return str(ret) + elif tool_name == "image_gen": + import urllib.parse + + tool_args = tool_args.replace("(", "").replace(")", "") + prompt = json5.loads(tool_args)["prompt"] + prompt = urllib.parse.quote(prompt) + return json.dumps( + {"image_url": f"https://image.pollinations.ai/prompt/{prompt}"}, + ensure_ascii=False, + ) + else: + raise NotImplementedError + + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): + chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] + + planning_prompt = build_input_text(chat_history, list_of_tool_info) + text = "" + while True: + output = text_completion(planning_prompt + text, stop_words=["Observation:", "Observation:\n"]) + action, action_input, output = parse_latest_tool_call(output) + if action: + observation = call_tool(action, action_input) + output += f"\nObservation: = {observation}\nThought:" + observation = f"{observation}\nThought:" + print(observation) + text += output + else: + text += output + break + + new_history = [] + new_history.extend(history) + new_history.append({"user": prompt, "bot": text}) + return text, new_history + +Run agent +--------- + + + +.. code:: ipython3 + + history = [] + query = "get the weather in London, and create a picture of Big Ben based on the weather information" + + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) + + +.. parsed-literal:: + + Thought: First, I need to use the get_weather API to get the current weather in London. + Action: get_weather + Action Input: {"city_name": "London"} + Observation: + {'current_condition': {'temp_C': '11', 'FeelsLikeC': '10', 'humidity': '94', 'weatherDesc': [{'value': 'Overcast'}], 'observation_time': '12:23 AM'}} + Thought: + Now that I have the weather information, I will use the image_gen API to generate an image of Big Ben based on the weather conditions. + Action: image_gen + Action Input: {"prompt": "Big Ben under overcast sky with temperature 11°C and humidity 94%"} + Observation: + {"image_url": "https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25"} + Thought: + The image has been generated successfully. + Final Answer: The current weather in London is overcast with a temperature of 11°C and humidity of 94%. Based on this information, here is the image of Big Ben under an overcast sky: ![](https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25) + diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index ac4c22ffc3b185..dab94c37d77a4c 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -97,15 +97,6 @@ Install required dependencies "transformers>=4.43.1" \ "onnx<=1.16.1; sys_platform=='win32'" "einops" "transformers_stream_generator" "tiktoken" "bitsandbytes" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import os @@ -199,8 +190,16 @@ several options for model weight compression:
+.. raw:: html + + + Click here to see available models options +.. raw:: html + + + - **tiny-llama-1b-chat** - This is the chat model finetuned on top of `TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T `__. The TinyLlama project aims to pretrain a 1.1B Llama model on 3 @@ -293,9 +292,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -387,9 +387,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -666,7 +667,7 @@ Click here to see available models options .. parsed-literal:: - Selected model qwen2.5-0.5b-instruct with INT4 compression + Selected model qwen2-0.5b-instruct with INT4 compression Convert model using Optimum-CLI tool @@ -674,8 +675,8 @@ Convert model using Optimum-CLI tool -`Optimum Intel `__ is -the interface between the +`Optimum Intel `__ +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -688,8 +689,16 @@ format.
+.. raw:: html + + + Click here to read more about Optimum CLI usage +.. raw:: html + + + The command bellow demonstrates basic command for model export with ``optimum-cli`` @@ -722,8 +731,16 @@ with the CLI.
+.. raw:: html + + + Click here to read more about weights compression with Optimum CLI +.. raw:: html + + + Setting ``--weight-format`` to respectively fp16, int8 or int4. This type of optimization allows to reduce the memory footprint and inference latency. By default the quantization scheme for int8/int4 will be @@ -766,47 +783,7 @@ be additionally applied during model export with INT4 precision using .. parsed-literal:: - ⌛ qwen2.5-0.5b-instruct conversion to INT4 started. It may takes some time. - - - -**Export command:** - - - -``optimum-cli export openvino --model Qwen/Qwen2.5-0.5B-Instruct --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 1.0 --sym qwen2.5/INT4_compressed_weights`` - - -.. parsed-literal:: - - 2024-10-08 02:53:02.359208: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:53:02.392956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:53:02.929372: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> True - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - Set tokenizer padding side to left for `text-generation-with-past` task. - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (1 / 169) │ 0% (0 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 72% (168 / 169) │ 100% (168 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:14 • 0:00:00 - ✅ INT4 qwen2.5-0.5b-instruct model converted and can be found in qwen2.5/INT4_compressed_weights + ✅ INT4 qwen2-0.5b-instruct model already converted and can be found in qwen2/INT4_compressed_weights Let’s compare model size for different compression types @@ -820,7 +797,7 @@ Let’s compare model size for different compression types .. parsed-literal:: - Size of model with INT4 compressed weights is 322.44 MB + Size of model with INT4 compressed weights is 358.86 MB Select device for inference @@ -891,10 +868,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: - Loading model from qwen2.5/INT4_compressed_weights + Loading model from qwen2/INT4_compressed_weights Input text: The Sun is yellow bacause - of its coloration. The Sun is blue because + it is made of hydrogen and oxygen atoms. The Run Chatbot @@ -909,8 +886,16 @@ Now, when model created, we can setup Chatbot interface using
+.. raw:: html + + + Click here to see how pipeline works +.. raw:: html + + + The diagram below illustrates how the chatbot pipeline works .. figure:: https://github.com/user-attachments/assets/9c9b56e1-01a6-48d8-aa46-222a88e25066 @@ -963,8 +948,16 @@ Advanced generation options
+.. raw:: html + + + Click here to see detailed description of advanced options +.. raw:: html + + + | There are several parameters that can control text generation quality, \* ``Temperature`` is a parameter used to control the level of creativity in AI-generated text. By adjusting the ``temperature``, you @@ -1036,27 +1029,13 @@ Click here to see detailed description of advanced options demo = make_demo(pipe, model_configuration, model_id, lang.value) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) + demo.launch(debug=True, share=True) # If you are launching remotely, specify server_name and server_port # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index cbd76c0544ba82..0d214f5cccc0fc 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -83,9 +83,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly @@ -104,12 +104,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -184,7 +184,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -217,7 +217,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -278,7 +278,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -311,7 +311,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -345,7 +345,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -379,7 +379,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -410,7 +410,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -545,14 +545,14 @@ Click here to see available models options .. code:: ipython3 model_languages = list(SUPPORTED_LLM_MODELS) - + model_language = widgets.Dropdown( options=model_languages, value=model_languages[0], description="Model Language:", disabled=False, ) - + model_language @@ -567,14 +567,14 @@ Click here to see available models options .. code:: ipython3 model_ids = list(SUPPORTED_LLM_MODELS[model_language.value]) - + model_id = widgets.Dropdown( options=model_ids, value=model_ids[0], description="Model:", disabled=False, ) - + model_id @@ -603,7 +603,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ is -the interface between the +the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -655,12 +655,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. @@ -671,7 +672,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from IPython.display import Markdown, display - + prepare_int4_model = widgets.Checkbox( value=True, description="Prepare INT4 model", @@ -687,7 +688,7 @@ sacrifice of the model size and inference latency. description="Prepare FP16 model", disabled=False, ) - + display(prepare_int4_model) display(prepare_int8_model) display(prepare_fp16_model) @@ -756,14 +757,14 @@ We can now save floating point and compressed model variants .. code:: ipython3 from pathlib import Path - + pt_model_id = model_configuration["model_id"] pt_model_name = model_id.value.split("-")[0] fp16_model_dir = Path(model_id.value) / "FP16" int8_model_dir = Path(model_id.value) / "INT8_compressed_weights" int4_model_dir = Path(model_id.value) / "INT4_compressed_weights" - - + + def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return @@ -775,8 +776,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return @@ -789,8 +790,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int4(): compression_configs = { "zephyr-7b-beta": { @@ -865,7 +866,7 @@ We can now save floating point and compressed model variants "ratio": 0.8, }, } - + model_compression_params = compression_configs.get(model_id.value, compression_configs["default"]) if (int4_model_dir / "openvino_model.xml").exists(): return @@ -883,8 +884,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + if prepare_fp16_model.value: convert_to_fp16() if prepare_int8_model.value: @@ -899,7 +900,7 @@ Let’s compare model size for different compression types fp16_weights = fp16_model_dir / "openvino_model.bin" int8_weights = int8_model_dir / "openvino_model.bin" int4_weights = int4_model_dir / "openvino_model.bin" - + if fp16_weights.exists(): print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): @@ -925,16 +926,16 @@ Select device for inference and model variant .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -958,14 +959,14 @@ variant of model weights and inference device available_models.append("INT8") if fp16_model_dir.exists(): available_models.append("FP16") - + model_to_run = widgets.Dropdown( options=available_models, value=available_models[0], description="Model to run:", disabled=False, ) - + model_to_run @@ -1017,13 +1018,13 @@ guide `__ from transformers import AutoConfig, AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM - + import openvino as ov import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - - + + if model_to_run.value == "INT4": model_dir = int4_model_dir elif model_to_run.value == "INT8": @@ -1031,22 +1032,22 @@ guide `__ else: model_dir = fp16_model_dir print(f"Loading model from {model_dir}") - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + if "GPU" in device.value and "qwen2-7b-instruct" in model_id.value: ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO" - + # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy # issues caused by this, which we avoid by setting precision hint to "f32". core = ov.Core() - + if model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and device.value in ["GPU", "AUTO"]: ov_config["INFERENCE_PRECISION_HINT"] = "f32" - + model_name = model_configuration["model_id"] tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) - + ov_model = OVModelForCausalLM.from_pretrained( model_dir, device=device.value, @@ -1120,14 +1121,14 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html :: - playing: 0.5 - sleeping: 0.25 - eating: 0.15 - driving: 0.05 - flying: 0.05 + playing: 0.5 + sleeping: 0.25 + eating: 0.15 + driving: 0.05 + flying: 0.05 - - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." - - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." + - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." + - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." - **High temperature** (e.g., 2.0): The AI model becomes more adventurous, increasing the chances of selecting less likely tokens, such as "driving" and "flying." - ``Top-p``, also known as nucleus sampling, is a parameter used to @@ -1165,7 +1166,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html import torch from threading import Event, Thread - + from typing import List, Tuple from transformers import ( AutoTokenizer, @@ -1173,8 +1174,8 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html StoppingCriteriaList, TextIteratorStreamer, ) - - + + model_name = model_configuration["model_id"] start_message = model_configuration["start_message"] history_template = model_configuration.get("history_template") @@ -1182,46 +1183,46 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html current_message_template = model_configuration.get("current_message_template") stop_tokens = model_configuration.get("stop_tokens") tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) - + max_new_tokens = 256 - - + + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): self.token_ids = token_ids - + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_id in self.token_ids: if input_ids[0][-1] == stop_id: return True return False - - + + if stop_tokens is not None: if isinstance(stop_tokens[0], str): stop_tokens = tok.convert_tokens_to_ids(stop_tokens) - + stop_tokens = [StopOnTokens(stop_tokens)] - - + + def default_partial_text_processor(partial_text: str, new_text: str): """ helper for updating partially generated answer, used by default - + Params: partial_text: text buffer for storing previosly generated text new_text: text update for the current step Returns: updated text string - + """ partial_text += new_text return partial_text - - + + text_processor = model_configuration.get("partial_text_processor", default_partial_text_processor) - - + + def convert_history_to_token(history: List[Tuple[str, str]]): """ function for conversion history stored as list pairs of user and assistant messages to tokens according to model expected conversation template @@ -1255,7 +1256,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html messages.append({"role": "user", "content": user_msg}) if model_msg: messages.append({"role": "assistant", "content": model_msg}) - + input_token = tok.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt") else: text = start_message + "".join( @@ -1276,12 +1277,12 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids return input_token - - + + def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id): """ callback function for running chatbot on submit button click - + Params: history: conversation history temperature: parameter for control the level of creativity in AI-generated text. @@ -1290,9 +1291,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. conversation_id: unique conversation identifier. - + """ - + # Construct the input message string for the model by concatenating the current system message and conversation history # Tokenize the messages string input_ids = convert_history_to_token(history) @@ -1312,9 +1313,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) if stop_tokens is not None: generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens) - + stream_complete = Event() - + def generate_and_signal_complete(): """ genration function for single thread @@ -1322,18 +1323,18 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html global start_time ov_model.generate(**generate_kwargs) stream_complete.set() - + t1 = Thread(target=generate_and_signal_complete) t1.start() - + # Initialize an empty string to store the generated text partial_text = "" for new_text in streamer: partial_text = text_processor(partial_text, new_text) history[-1][1] = partial_text yield history - - + + def request_cancel(): ov_model.request.cancel() @@ -1342,11 +1343,11 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO {model_id.value} Chatbot", language=model_language.value) - + try: demo.launch() except Exception: diff --git a/docs/notebooks/llm-rag-langchain-with-output.rst b/docs/notebooks/llm-rag-langchain-with-output.rst index 935c4c5ef1f205..1dec9cb2fb6659 100644 --- a/docs/notebooks/llm-rag-langchain-with-output.rst +++ b/docs/notebooks/llm-rag-langchain-with-output.rst @@ -127,7 +127,8 @@ Install required dependencies "onnx<1.16.2", "einops", "transformers_stream_generator", - "tiktoken" "transformers>=4.43.1", + "tiktoken", + "transformers>=4.43.1", "faiss-cpu", "sentence_transformers", "langchain>=0.2.0", diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index 2fbe7e63a8b21b..3ef21583fa5807 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -41,7 +41,6 @@ post `__ diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index fbb69d83fe239a..7f539abf025ebb 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Import @@ -645,7 +637,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 2c5cdf1aecf169..f6a22b6f160760 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -205,10 +205,10 @@ Let’s convert each model part. .. parsed-literal:: - 2024-10-08 02:54:38.009287: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:54:38.043246: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:47:25.606377: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:47:25.640217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:54:38.562064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 01:47:26.161344: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -233,49 +233,49 @@ Let’s convert each model part. .. parsed-literal:: - config.json: 0%| | 0.00/1.36k [00:00 self.max_seq_len_cached: @@ -548,11 +548,11 @@ documentation `__ - `Select model <#select-model>`__ @@ -64,9 +62,9 @@ Prerequisites .. code:: ipython3 from pathlib import Path - + repo_dir = Path("./ml-mobileclip") - + if not repo_dir.exists(): !git clone https://github.com/apple/ml-mobileclip.git @@ -74,27 +72,26 @@ Prerequisites .. parsed-literal:: Cloning into 'ml-mobileclip'... - remote: Enumerating objects: 84, done. - remote: Counting objects: 100% (84/84), done. - remote: Compressing objects: 100% (61/61), done. - remote: Total 84 (delta 29), reused 75 (delta 22), pack-reused 0 (from 0) - Unpacking objects: 100% (84/84), 467.39 KiB | 2.58 MiB/s, done. + remote: Enumerating objects: 95, done. + remote: Counting objects: 100% (95/95), done. + remote: Compressing objects: 100% (66/66), done. + remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) + Unpacking objects: 100% (95/95), 469.11 KiB | 3.91 MiB/s, done. .. code:: ipython3 %pip install -q "./ml-mobileclip" --no-deps - + %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu - - %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" + + %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -144,9 +141,9 @@ comparison purposes, you can select different models among: .. code:: ipython3 import ipywidgets as widgets - + model_dir = Path("checkpoints") - + supported_models = { "MobileCLIP": { "mobileclip_s0": { @@ -215,8 +212,8 @@ comparison purposes, you can select different models among: }, }, } - - + + model_type = widgets.Dropdown(options=supported_models.keys(), default="MobileCLIP", description="Model type:") model_type @@ -232,13 +229,13 @@ comparison purposes, you can select different models among: .. code:: ipython3 available_models = supported_models[model_type.value] - + model_checkpoint = widgets.Dropdown( options=available_models.keys(), default=list(available_models), description="Model:", ) - + model_checkpoint @@ -253,15 +250,15 @@ comparison purposes, you can select different models among: .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file, device_widget - + model_config = available_models[model_checkpoint.value] Run model inference @@ -296,8 +293,8 @@ Prepare image gallery import matplotlib.pyplot as plt import numpy as np from PIL import Image - - + + def visualize_result(images: List, query: str = "", selected: List[int] = None): """ Utility function for visualization classification results @@ -325,8 +322,8 @@ Prepare image gallery mask = np.ones_like(np.array(images[idx])) a.imshow(mask, "jet", interpolation="none", alpha=0.75) return fig - - + + images_urls = [ "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/282ce53e-912d-41aa-ab48-2a001c022d74", "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/9bb40168-82b5-4b11-ada6-d8df104c736c", @@ -336,17 +333,17 @@ Prepare image gallery image_names = ["red_panda.png", "cat.png", "raccoon.png", "dog.png"] sample_path = Path("data") sample_path.mkdir(parents=True, exist_ok=True) - + images = [] for image_name, image_url in zip(image_names, images_urls): image_path = sample_path / image_name if not image_path.exists(): download_file(image_url, filename=image_name, directory=sample_path) images.append(Image.open(image_path).convert("RGB").resize((640, 420))) - + input_labels = ["cat"] text_descriptions = [f"This is a photo of a {label}" for label in input_labels] - + visualize_result(images, "image gallery"); @@ -393,7 +390,7 @@ preprocessing utilities from PIL import Image import mobileclip import open_clip - + # instantiate model model_name = model_config["model_name"] pretrained = model_config["pretrained"] @@ -408,6 +405,12 @@ preprocessing utilities tokenizer = open_clip.get_tokenizer(model_name) +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + .. parsed-literal:: @@ -423,8 +426,8 @@ Perform search image_tensor = torch.stack([preprocess(image) for image in images]) text = tokenizer(text_descriptions) - - + + with torch.no_grad(): # calculate image embeddings image_encoding_start = time.perf_counter() @@ -436,22 +439,22 @@ Perform search text_features = model.encode_text(text) text_encoding_end = time.perf_counter() print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") - + # normalize embeddings image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + # calcualte similarity score image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.108 ms - Text encoding took 0.0118 ms + Image encoding took 0.136 ms + Text encoding took 0.0123 ms @@ -478,8 +481,8 @@ be used separately. Let’s convert each part to OpenVINO. import types import torch.nn.functional as F - - + + def se_block_forward(self, inputs): """Apply forward pass.""" b, c, h, w = inputs.size() @@ -495,12 +498,12 @@ be used separately. Let’s convert each part to OpenVINO. import openvino as ov import gc - + ov_models_dir = Path("ov_models") ov_models_dir.mkdir(exist_ok=True) - + image_encoder_path = ov_models_dir / f"{model_checkpoint.value}_im_encoder.xml" - + if not image_encoder_path.exists(): if "mobileclip_s" in model_name: model.image_encoder.model.conv_exp.se.forward = types.MethodType(se_block_forward, model.image_encoder.model.conv_exp.se) @@ -513,23 +516,23 @@ be used separately. Let’s convert each part to OpenVINO. ov.save_model(ov_image_encoder, image_encoder_path) del ov_image_encoder gc.collect() - + text_encoder_path = ov_models_dir / f"{model_checkpoint.value}_text_encoder.xml" - + if not text_encoder_path.exists(): model.forward = model.encode_text ov_text_encoder = ov.convert_model(model, example_input=text, input=[-1, text.shape[1]]) ov.save_model(ov_text_encoder, text_encoder_path) del ov_text_encoder gc.collect() - + del model gc.collect(); .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -546,9 +549,9 @@ Select device for image encoder .. code:: ipython3 core = ov.Core() - + device = device_widget() - + device @@ -605,17 +608,17 @@ Perform search print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - + image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) selected_image = [torch.argmax(image_probs).item()] - + visualize_result(images, input_labels[0], selected_image); .. parsed-literal:: - Image encoding took 0.0271 ms - Text encoding took 0.00495 ms + Image encoding took 0.0297 ms + Text encoding took 0.00513 ms @@ -651,14 +654,14 @@ models can require different optimal threshold for search. ) from open_clip.transform import image_transform from typing import Optional - - + + current_device = device.value current_model = image_encoder_path.name.split("_im_encoder")[0] - + available_converted_models = [model_file.name.split("_im_encoder")[0] for model_file in ov_models_dir.glob("*_im_encoder.xml")] available_devices = list(core.available_devices) + ["AUTO"] - + download_file( "https://storage.openvinotoolkit.org/data/test_data/videos/car-detection.mp4", directory=sample_path, @@ -668,8 +671,8 @@ models can require different optimal threshold for search. directory=sample_path, filename="coco.mp4", ) - - + + def get_preprocess_and_tokenizer(model_name): if "mobileclip" in model_name: resolution = supported_models["MobileCLIP"][model_name]["image_size"] @@ -690,10 +693,10 @@ models can require different optimal threshold for search. resize_size = model_configs[model_name]["image_size"] preprocess = image_transform((resize_size, resize_size), is_train=False, resize_mode="longest") tokenizer = open_clip.get_tokenizer(model_configs[model_name]["model_name"]) - + return preprocess, tokenizer - - + + def run( path: str, text_search: str, @@ -712,7 +715,7 @@ models can require different optimal threshold for search. global tokenizer global ov_compiled_image_encoder global ov_compiled_text_encoder - + if current_model != model_name or device != current_device: ov_compiled_image_encoder = core.compile_model(ov_models_dir / f"{model_name}_im_encoder.xml", device) ov_compiled_text_encoder = core.compile_model(ov_models_dir / f"{model_name}_text_encoder.xml", device) @@ -722,7 +725,7 @@ models can require different optimal threshold for search. # Load video dataset = LoadVideo(path, transforms=preprocess, vid_stride=stride) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0) - + # Get image query features if image_search: image = preprocess(image_search).unsqueeze(0) @@ -742,11 +745,11 @@ models can require different optimal threshold for search. for image, orig, frame, timestamp in dataloader: with torch.no_grad(): image_features = torch.from_numpy(ov_compiled_image_encoder(image)[0]) - + image_features /= image_features.norm(dim=-1, keepdim=True) probs = query_features.cpu().numpy() @ image_features.cpu().numpy().T probs = probs[0] - + # Save frame similarity values df = pd.DataFrame( { @@ -756,15 +759,15 @@ models can require different optimal threshold for search. } ) res = pd.concat([res, df]) - + # Check if frame is over threshold for i, p in enumerate(probs): if p > thresh: matches.append(to_pil_image(orig[i])) matches_probs.append(p) - + print(f"Frames: {frame.tolist()} - Probs: {probs}") - + # Create plot of similarity values lines = ( alt.Chart(res) @@ -775,16 +778,16 @@ models can require different optimal threshold for search. ) ).properties(width=600) rule = alt.Chart().mark_rule(strokeDash=[6, 3], size=2).encode(y=alt.datum(thresh)) - + selected_frames = np.argsort(-1 * np.array(matches_probs))[:20] matched_sorted_frames = [matches[idx] for idx in selected_frames] - + return ( lines + rule, matched_sorted_frames, ) # Only return up to 20 images to not crash the UI - - + + class LoadVideo(Dataset): def __init__(self, path, transforms, vid_stride=1): self.transforms = transforms @@ -792,27 +795,27 @@ models can require different optimal threshold for search. self.cur_frame = 0 self.cap = cv2.VideoCapture(path) self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - + def __getitem__(self, _): # Read video # Skip over frames for _ in range(self.vid_stride): self.cap.grab() self.cur_frame += 1 - + # Read frame _, img = self.cap.retrieve() timestamp = self.cap.get(cv2.CAP_PROP_POS_MSEC) - + # Convert to PIL img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(np.uint8(img)) - + # Apply transforms img_t = self.transforms(img) - + return img_t, to_tensor(img), self.cur_frame, timestamp - + def __len__(self): return self.total_frames @@ -834,15 +837,15 @@ models can require different optimal threshold for search. if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/mobileclip-video-search/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo, Option - + demo = make_demo( run=run, model_option=Option(choices=available_converted_models, value=model_checkpoint.value), device_option=Option(choices=available_devices, value=device.value), ) - + try: demo.launch(debug=False) except Exception: @@ -855,7 +858,7 @@ models can require different optimal threshold for search. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst index 1ba06287ff485c..02efe16d9c0f4a 100644 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ b/docs/notebooks/mobilevlm-language-assistant-with-output.rst @@ -67,9 +67,7 @@ Install requirements Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - optimum-intel 1.20.0.dev0+542347b requires transformers<4.46,>=4.36, but you have transformers 4.33.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -121,13 +119,13 @@ Import required packages .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - 2024-10-08 03:04:16.549795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:04:16.584461: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:57:03.532418: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:57:03.567584: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:04:17.090418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-23 01:57:04.078609: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -344,15 +342,15 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -372,13 +370,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 24% (43 / 169) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 76% (126 / 169) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -418,7 +416,7 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -438,13 +436,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (44 / 170) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 72% (126 / 170) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/model-server-with-output.rst b/docs/notebooks/model-server-with-output.rst index dc6c9e966cf462..d5a9347a46e807 100644 --- a/docs/notebooks/model-server-with-output.rst +++ b/docs/notebooks/model-server-with-output.rst @@ -181,14 +181,7 @@ following rules: .. code:: ipython3 - import platform - - %pip install -q "openvino>=2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.4.0" opencv-python tqdm "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index d1fc70cca19a6d..566aa1c87a941c 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,13 +124,13 @@ Imports .. parsed-literal:: - 2024-10-08 03:06:32.000424: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:06:32.034271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 01:59:20.725760: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 01:59:20.759494: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:06:32.663477: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-10-23 01:59:21.367845: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -170,11 +170,11 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -229,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -655,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 78700602513056..082b6613456e28 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -15,8 +15,8 @@ OpenVINO. Additionally, we will optimize model using - `Prerequisites <#prerequisites>`__ -- `Load PyTorch model <#load-pytorch-model>`__ -- `Run PyTorch Model Inference <#run-pytorch-model-inference>`__ +- `Select Model <#select-model>`__ +- `Download PyTorch model <#download-pytorch-model>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ - `Convert model to OpenVINO IR @@ -24,7 +24,6 @@ OpenVINO. Additionally, we will optimize model using - `Compress Model weights to 4 and 8 bits using NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - `Image Encoder <#image-encoder>`__ - - `Text Embeddings <#text-embeddings>`__ - `Language Model <#language-model>`__ - `Prepare model inference @@ -52,252 +51,551 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "openvino>=2024.1.0" "tqdm" "nncf>=2.10" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.13" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino_tokenizers[transformers] "openvino>=2024.4.0" + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + openvino-dev 2024.4.0 requires openvino==2024.4.0, but you have openvino 2024.5.0.dev20241014 which is incompatible. + openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 - from huggingface_hub import snapshot_download from pathlib import Path - - model_local_dir = Path("nanoLLaVA") - - if not model_local_dir.exists(): - snapshot_download(repo_id="qnguyen3/nanoLLaVA", local_dir=model_local_dir) - - modeling_file = model_local_dir / "modeling_llava_qwen2.py" - orig_modeling_file = model_local_dir / f"orig_{modeling_file.name}" - - - # model code depends from flash_attn package that may be problematic to load. Patch model code for avoiding import of this package - if not orig_modeling_file.exists(): - modeling_file.rename(orig_modeling_file) - with orig_modeling_file.open("r") as f: - content = f.read() - replacement_lines = [ - ("from flash_attn import flash_attn_func, flash_attn_varlen_func", ""), - ("from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input", ""), - (' _flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)', "pass"), - ] - - for replace_pair in replacement_lines: - content = content.replace(*replace_pair) - - with modeling_file.open("w") as f: - f.write(content) - + import requests + helper_file = Path("ov_nano_llava_helper.py") -.. parsed-literal:: - - Fetching 14 files: 0%| | 0/14 [00:00`__ +- `nanoLLaVA-1.5 `__ +You can select one from the provided options below. -.. parsed-literal:: +.. code:: ipython3 - configuration_llava_qwen2.py: 0%| | 0.00/8.87k [00:00`__ +library. For convenience, we will use OpenVINO integration with +HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. -Load PyTorch model ------------------- +General command format: +.. code:: bash + optimum-cli export openvino --model --task -For creating PyTorch model we should use ``from_pretrained`` method of -``AutoModelForCausalLM`` model class. Model weights are already -downloaded from HuggingFace hub using ``snapshot_download`` function on -previous step. +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. .. code:: ipython3 - import transformers - from transformers import AutoModelForCausalLM, AutoTokenizer - from PIL import Image - import warnings - - transformers.logging.set_verbosity_error() - warnings.filterwarnings("ignore") - - model = AutoModelForCausalLM.from_pretrained(model_local_dir, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_local_dir, trust_remote_code=True) + if not converted_model_exists(ov_model_dir): + !optimum-cli export openvino --model {model_id} --task image-text-to-text --trust-remote-code --weight-format fp16 {ov_model_dir} .. parsed-literal:: - 2024-10-08 03:11:17.270186: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:11:17.304136: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:04:00.682228: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:04:00.715051: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:11:18.027701: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Run PyTorch Model Inference ---------------------------- - - - -.. code:: ipython3 - - import torch - import requests - - prompt = "Describe this image in detail" - - messages = [{"role": "user", "content": f"\n{prompt}"}] - text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" - image = Image.open(requests.get(url, stream=True).raw) - image_tensor = model.process_images([image], model.config) - print(prompt) - image - - -.. parsed-literal:: - - Describe this image in detail - - + 2024-10-23 02:04:01.329449: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Some weights of the model checkpoint at qnguyen3/nanoLLaVA were not used when initializing LlavaQwen2ForCausalLM: ['model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.bias', 'model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight', 'model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_bias', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_weight', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.layernorm.bias', 'model.vision_tower.vision_tower.vision_model.head.layernorm.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.head.probe', 'model.vision_tower.vision_tower.vision_model.post_layernorm.bias', 'model.vision_tower.vision_tower.vision_model.post_layernorm.weight'] + - This IS expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:169: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + Unexpectedly found already patched module model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.0.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.1.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.2.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.3.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.4.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.5.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.6.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.7.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.8.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.9.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.10.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.11.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.12.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.13.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.14.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.15.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.16.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.17.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.18.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.19.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.20.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.21.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.22.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.layers.23.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.embeddings.position_embedding while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.mm_projector.0 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module model.mm_projector.2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + Unexpectedly found already patched module lm_head while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:116: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:307: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.max_seq_len_cached: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/1ea99cffcf50a27c5f06fe5d22a07046aba0bffe/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + OpenVINO and OpenVINO Tokenizers versions are not binary compatible. + OpenVINO version: 2024.5.0-16993 + OpenVINO Tokenizers version: 2024.5.0.0 + First 3 numbers should be the same. Update OpenVINO Tokenizers to compatible version. It is recommended to use the same day builds for pre-release version. To install both OpenVINO and OpenVINO Tokenizers release version perform: + pip install --force-reinstall openvino openvino-tokenizers + To update both OpenVINO and OpenVINO Tokenizers to the latest pre-release version perform: + pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + Tokenizer won't be converted. + Traceback (most recent call last): + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in + sys.exit(main()) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + service.run() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run + main_export( + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export + core = Core() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init + self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object + RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: + Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE -.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png - - - -.. code:: ipython3 - - from transformers import TextStreamer - - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) - - -.. parsed-literal:: - - The image features a white, fluffy lamb, likely a lama, in the midst of a fire. The lamb's fluffy fur is a mix of white and black, and it has a unique pattern of black spots on its body. The lamb's eyes are a bright shade of blue, and its ears are also white. The lamb's mouth is open, revealing pink lips, adding a playful touch to its overall appearance. - The lamb's face is quite detailed, with features such as a small black eye, a small nose, and a black mouth. The lamb's face is also quite expressive, with its mouth open, revealing pink lips - - -Convert and Optimize model --------------------------- - - - -Our model conversion and optimization consist of following steps: 1. -Convert model to OpenVINO format and save it on disk. 2. Compress model -weights using NNCF - -Let’s consider each step more deeply. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined bellow. We will use `OpenVINO Model Conversion -API `__ -for conversion PyTorch model to OpenVINO Intermediate Representation -format. ``ov.convert_model`` function accepts PyTorch model instance and -example input for tracing and returns ready to use OpenVINO Model object -that can be compiled on device using ``core.compile_model`` or saved on -disk for next usage with help ``ov.save_model`` function. Depends from -generation step, model accepts different inputs and activates different -parts of pipeline. For preserving the same level of flexibility, we will -split model on parts: Image Encoder, Text Embeddings, Language Model and -convert each part separately. Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -337,22 +635,20 @@ prediction quality. More details about weights compression, can be found in `OpenVINO documentation `__. - **Note**: There is no speedup for INT4 compressed models on dGPU. - Please select below whether you would like to run INT4 weight compression instead of INT8 weight compression. .. code:: ipython3 import ipywidgets as widgets - + compression_mode = widgets.Dropdown( options=["INT4", "INT8"], value="INT4", description="Compression mode:", disabled=False, ) - + compression_mode @@ -366,101 +662,25 @@ compression instead of INT8 weight compression. .. code:: ipython3 - import gc - import warnings - import torch - import openvino as ov import nncf - from typing import Optional, Tuple - - warnings.filterwarnings("ignore") - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - + import openvino as ov -.. code:: ipython3 + core = ov.Core() if compression_mode.value == "INT4": - ov_out_path = Path("ov_nanollava/INT4_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT4" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) else: - ov_out_path = Path("ov_nanollava/INT8_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT8" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - + image_encoder_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - ov_out_path.mkdir(exist_ok=True, parents=True) - model.config.save_pretrained(ov_out_path) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - model_path = ov_out_path / "llava_with_past.xml" - - model.eval() - model.config.use_cache = True - model.config.torchscript = True + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + Image Encoder ~~~~~~~~~~~~~ @@ -469,60 +689,42 @@ Image Encoder Image Encoder is represented in nanoLLaVA by pretrained SigLIP model. Image encoder is responsible for encoding input images into embedding -space. +space. Code bellow demonstrates how to apply weights compression for +image encoder model. .. code:: ipython3 - if not image_encoder_path.exists(): - model.forward = model.encode_images - with torch.no_grad(): - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 384, 384)), - input=[(-1, 3, 384, 384)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -.. parsed-literal:: + import gc - WARNING:nncf:NNCF provides best results with torch==2.4.*, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.4.* + compressed_vision_encoder_path = ov_compressed_model_dir / "openvino_vision_embeddings_model.xml" + vision_encoder_path = ov_model_dir / "openvino_vision_embeddings_model.xml" + if not compressed_vision_encoder_path.exists(): + ov_vision_encoder = core.read_model(vision_encoder_path) + ov_compressed_vision_encoder = nncf.compress_weights(ov_vision_encoder, **image_encoder_wc_parameters) + ov.save_model(ov_compressed_vision_encoder, compressed_vision_encoder_path) + del ov_compressed_vision_encoder + del ov_vision_encoder + gc.collect(); .. parsed-literal:: - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + warning_deprecated( + 2024-10-23 02:04:33.280788: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:04:33.314985: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-10-23 02:04:33.946816: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: - Applying weight compression to image encoder INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (159 / 159) │ 100% (159 / 159) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (159 / 159) │ 100% (159 / 159) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -538,39 +740,6 @@ space. -.. parsed-literal:: - - Image Encoder model successfully converted - - -Text Embeddings -~~~~~~~~~~~~~~~ - - - -In LLMs, input embedding is a part of language model, but for LLaVA the -first step hidden state produced by this model part should be integrated -with image embeddings into common embedding space. For ability to reuse -this model part and avoid introduction of extra llm model instance, we -will use it separately. - -.. code:: ipython3 - - if not token_embedding_model_path.exists(): - with torch.no_grad(): - ov_model = ov.convert_model(model.get_model().embed_tokens, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - -.. parsed-literal:: - - Token Embedding model successfully converted - - Language Model ~~~~~~~~~~~~~~ @@ -579,71 +748,22 @@ Language Model Language Model is responsible for generation answer in LLaVA. This part is very similar to standard LLM for text generation. Our model uses `Qwen/Qwen1.5-0.5B `__ as base -LLM. To optimize the generation process and use memory more efficiently, -HuggingFace transformers API provides a mechanism for caching model -state externally using ``use_cache=True`` parameter and -``past_key_values`` argument in inputs and outputs. With the cache, the -model saves the hidden state once it has been computed. The model only -computes the one for the most recently generated output token at each -time step, re-using the saved ones for hidden tokens. This reduces the -generation complexity from :math:`O(n^3)` to :math:`O(n^2)` for a -transformer model. With this option, the model gets the previous step’s -hidden states (cached attention keys and values) as input and -additionally provides hidden states for the current step as output. It -means for all next iterations, it is enough to provide only a new token -obtained from the previous step and cached key values to get the next -token prediction. +LLM. .. code:: ipython3 - if not model_path.exists(): - model.forward = super(type(model), model).forward - example_input = {"attention_mask": torch.ones([2, 10], dtype=torch.int64), "position_ids": torch.tensor([[8, 9], [8, 9]], dtype=torch.int64)} - - dynamic_shapes = { - "input_embeds": {0: "batch_size", 1: "seq_len"}, - "attention_mask": {0: "batch_size", 1: "prev_seq_len + seq_len"}, - "position_ids": {0: "batch_size", 1: "seq_len"}, - } - input_embeds = torch.zeros((2, 2, model.config.hidden_size)) - - input_names = ["attention_mask", "position_ids"] - output_names = ["logits"] - - past_key_values = [] - for i in range(model.config.num_hidden_layers): - kv = [torch.randn([2, model.config.num_key_value_heads, 8, model.config.hidden_size // model.config.num_attention_heads]) for _ in range(2)] - past_key_values.append(kv) - input_names.extend([f"past_key_values.{i}.key", f"past_key_values.{i}.value"]) - output_names.extend([f"present.{i}.key", f"present.{i}.value"]) - dynamic_shapes[input_names[-2]] = {0: "batch_size", 2: "seq_len"} - dynamic_shapes[input_names[-1]] = {0: "batch_size", 2: "seq_len"} - - example_input["past_key_values"] = past_key_values - example_input["inputs_embeds"] = input_embeds - input_names.append("inputs_embeds") - dynamic_shapes["inputs_embeds"] = {0: "batch_size", 1: "seq_len"} - ov_model = ov.convert_model(model, example_input=example_input) - ov_model = postprocess_converted_model( - ov_model, example_input=example_input.values(), input_names=input_names, output_names=output_names, dynamic_shapes=dynamic_shapes - ) - - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - print("LLaVA model successfully converted") - del model - gc.collect(); - + compressed_llm_path = ov_compressed_model_dir / "openvino_language_model.xml" + llm_path = ov_model_dir / "openvino_language_model.xml" -.. parsed-literal:: + if not compressed_llm_path.exists(): + ov_llm = core.read_model(llm_path) + ov_compressed_llm = nncf.compress_weights(ov_llm, **llava_wc_parameters) + ov.save_model(ov_compressed_llm, compressed_llm_path) + del ov_compressed_llm + del ov_llm + gc.collect() - Applying weight compression to second stage LLava model + copy_model_files(ov_model_dir, ov_compressed_model_dir) @@ -662,13 +782,13 @@ token prediction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 47% (48 / 169) │ 20% (47 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 53% (121 / 169) │ 80% (121 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 47% (48 / 169) │ 20% (47 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 53% (121 / 169) │ 80% (121 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -684,365 +804,24 @@ token prediction. -.. parsed-literal:: - - LLaVA model successfully converted - - Prepare model inference pipeline -------------------------------- -``OVLlavaQwen2ForCausalLM`` class provides ease-to-use interface for -using model in generation scenario. It is based on -``transformers.generation.GenerationMixin`` that gives us opportunity to -reuse all reach capabilities for generation implemented in HuggingFace -Transformers library. More details about this interface can be found in -`HuggingFace -documentation `__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - from transformers.image_processing_utils import BatchFeature, get_size_dict - from transformers.image_transforms import ( - convert_to_rgb, - normalize, - rescale, - resize, - to_channel_dimension_format, - ) - from transformers.image_utils import ( - ChannelDimension, - PILImageResampling, - to_numpy_array, - ) - import numpy as np - import torch - from typing import Dict - from functools import partial, reduce - - IGNORE_INDEX = -100 - IMAGE_TOKEN_INDEX = -200 - - - class ImageProcessor: - def __init__( - self, - image_mean=(0.5, 0.5, 0.5), - image_std=(0.5, 0.5, 0.5), - size=(384, 384), - crop_size: Dict[str, int] = None, - resample=PILImageResampling.BICUBIC, - rescale_factor=1 / 255, - data_format=ChannelDimension.FIRST, - ): - crop_size = crop_size if crop_size is not None else {"height": 384, "width": 384} - crop_size = get_size_dict(crop_size, default_to_square=True, param_name="crop_size") - - self.image_mean = image_mean - self.image_std = image_std - self.size = size - self.resample = resample - self.rescale_factor = rescale_factor - self.data_format = data_format - self.crop_size = crop_size - - def preprocess(self, images, return_tensors): - if isinstance(images, Image.Image): - images = [images] - else: - assert isinstance(images, list) - - transforms = [ - convert_to_rgb, - to_numpy_array, - partial(resize, size=self.size, resample=self.resample, data_format=self.data_format), - partial(rescale, scale=self.rescale_factor, data_format=self.data_format), - partial(normalize, mean=self.image_mean, std=self.image_std, data_format=self.data_format), - partial(to_channel_dimension_format, channel_dim=self.data_format, input_channel_dim=self.data_format), - ] - - images = reduce(lambda x, f: [*map(f, x)], transforms, images) - data = {"pixel_values": images} - - return BatchFeature(data=data, tensor_type=return_tensors) - - - class OVLlavaQwen2ForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.embed_tokens = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(self.config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self.image_processor = ImageProcessor() - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, position_ids, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = self.prepare_inputs_for_multimodal(input_ids, position_ids, attention_mask, past_key_values, images) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def prepare_inputs_for_multimodal(self, input_ids, position_ids, attention_mask, past_key_values, images): - inputs = {} - if past_key_values is None: - past_key_values = self._dummy_past_key_values(input_ids.shape[0]) - else: - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - inputs.update(zip(self.key_value_input_names, past_key_values)) - - if images is None or input_ids.shape[1] == 1: - target_shape = past_key_values[-1][-1].shape[-2] + 1 if past_key_values is not None else input_ids.shape[1] - attention_mask = torch.cat( - ( - attention_mask, - torch.ones((attention_mask.shape[0], target_shape - attention_mask.shape[1]), dtype=attention_mask.dtype, device=attention_mask.device), - ), - dim=1, - ) - position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 - inputs_embeds = self.embed_tokens(input_ids)[0] - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - inputs["inputs_embeds"] = inputs_embeds - - return inputs - - if type(images) is list or images.ndim == 5: - concat_images = torch.cat([image for image in images], dim=0) - image_features = self.encode_images(concat_images) - split_sizes = [image.shape[0] for image in images] - image_features = torch.split(image_features, split_sizes, dim=0) - image_features = [x.flatten(0, 1).to(self.device) for x in image_features] - else: - image_features = self.encode_images(images).to(self.device) - - # Let's just add dummy tensors if they do not exist, - # it is a headache to deal with None all the time. - # But it is not ideal, and if you have a better idea, - # please open an issue / submit a PR, thanks. - labels = None - _attention_mask = attention_mask - if attention_mask is None: - attention_mask = torch.ones_like(input_ids, dtype=torch.bool) - else: - attention_mask = attention_mask.bool() - if position_ids is None: - position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device) - if labels is None: - labels = torch.full_like(input_ids, IGNORE_INDEX) - - # remove the padding using attention_mask -- TODO: double check - input_ids = [cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask)] - labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)] - - new_input_embeds = [] - new_labels = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - num_images = (cur_input_ids == IMAGE_TOKEN_INDEX).sum() - if num_images == 0: - cur_image_features = image_features[cur_image_idx] - cur_input_embeds_1 = self.embed_tokens(cur_input_ids) - cur_input_embeds = torch.cat([cur_input_embeds_1, cur_image_features[0:0]], dim=0) - new_input_embeds.append(cur_input_embeds) - new_labels.append(labels[batch_idx]) - cur_image_idx += 1 - continue - - image_token_indices = [-1] + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist() + [cur_input_ids.shape[0]] - cur_input_ids_noim = [] - cur_labels = labels[batch_idx] - cur_labels_noim = [] - for i in range(len(image_token_indices) - 1): - cur_input_ids_noim.append(cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - cur_labels_noim.append(cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - split_sizes = [x.shape[0] for x in cur_labels_noim] - cur_input_embeds = torch.from_numpy(self.embed_tokens(torch.cat(cur_input_ids_noim).unsqueeze(0))[0])[0] - cur_input_embeds_no_im = torch.split(cur_input_embeds, split_sizes, dim=0) - cur_new_input_embeds = [] - cur_new_labels = [] - - for i in range(num_images + 1): - cur_new_input_embeds.append(cur_input_embeds_no_im[i]) - cur_new_labels.append(cur_labels_noim[i]) - if i < num_images: - cur_image_features = image_features[cur_image_idx] - cur_image_idx += 1 - cur_new_input_embeds.append(cur_image_features) - cur_new_labels.append(torch.full((cur_image_features.shape[0],), IGNORE_INDEX, device=cur_labels.device, dtype=cur_labels.dtype)) - - cur_new_input_embeds = torch.cat(cur_new_input_embeds) - cur_new_labels = torch.cat(cur_new_labels) - - new_input_embeds.append(cur_new_input_embeds) - new_labels.append(cur_new_labels) - - # Truncate sequences to max length as image embeddings can make the sequence longer - tokenizer_model_max_length = getattr(self.config, "tokenizer_model_max_length", None) - if tokenizer_model_max_length is not None: - new_input_embeds = [x[:tokenizer_model_max_length] for x in new_input_embeds] - new_labels = [x[:tokenizer_model_max_length] for x in new_labels] - - # Combine them - max_len = max(x.shape[0] for x in new_input_embeds) - batch_size = len(new_input_embeds) - - new_input_embeds_padded = [] - new_labels_padded = torch.full((batch_size, max_len), IGNORE_INDEX, dtype=new_labels[0].dtype, device=new_labels[0].device) - attention_mask = torch.zeros((batch_size, max_len), dtype=attention_mask.dtype, device=attention_mask.device) - position_ids = torch.zeros((batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device) - - for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)): - cur_len = cur_new_embed.shape[0] - if getattr(self.config, "tokenizer_padding_side", "right") == "left": - new_input_embeds_padded.append( - torch.cat( - (torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device), cur_new_embed), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, -cur_len:] = cur_new_labels - attention_mask[i, -cur_len:] = True - position_ids[i, -cur_len:] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - else: - new_input_embeds_padded.append( - torch.cat( - (cur_new_embed, torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device)), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, :cur_len] = cur_new_labels - attention_mask[i, :cur_len] = True - position_ids[i, :cur_len] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - - new_input_embeds = torch.stack(new_input_embeds_padded, dim=0) - attention_mask = attention_mask.to(dtype=_attention_mask.dtype) - inputs["inputs_embeds"] = new_input_embeds.numpy() - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - - return inputs - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "position_ids": kwargs.get("position_ids", None), - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - - def _dummy_past_key_values(self, batch_size): - pkv = [] - for input_name in self.key_value_input_names: - input_t = self.model.input(input_name) - input_shape = self.model.input(input_name).get_partial_shape() - input_shape[0] = batch_size - input_shape[2] = 0 - pkv.append(ov.Tensor(input_t.get_element_type(), input_shape.get_shape())) - - return pkv - - def encode_images(self, images): - return torch.from_numpy(self.image_encoder(images)[0]) - - def expand2square(self, pil_img, background_color): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - def process_images(self, images, model_cfg): - image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None) - new_images = [] - if image_aspect_ratio == "pad": - for image in images: - image = self.expand2square(image, tuple(int(x * 255) for x in self.image_processor.image_mean)) - image = self.image_processor.preprocess(image, return_tensors="pt")["pixel_values"][0] - new_images.append(image) - else: - return self.image_processor(images, return_tensors="pt")["pixel_values"] - if all(x.shape == new_images[0].shape for x in new_images): - new_images = torch.stack(new_images, dim=0) - return new_images +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers Pixtral implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. Run OpenVINO Model Inference ---------------------------- @@ -1057,16 +836,16 @@ Select device .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -1078,22 +857,66 @@ Select device +Optimum Intel provides Transformers-like interface for inference +OpenVINO models that allows smooth integration into user application, +where you need just replace model class, other parts of pipeline - +preprocessing and postprocessing code remains the same. It means that we +can use the same tokenizer and image processor that provided with model. + .. code:: ipython3 - core = ov.Core() - - ov_model = OVLlavaQwen2ForCausalLM(core, ov_out_path, device.value) + from optimum.intel.openvino import OVModelForVisualCausalLM + from transformers import AutoConfig, AutoTokenizer, AutoProcessor, TextStreamer + + # prepare tokenizer + tokenizer = AutoTokenizer.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + + # prepare image processor + config = AutoConfig.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(config.mm_vision_tower) + + # initialize OpenVINO model inference class + ov_model = OVModelForVisualCausalLM.from_pretrained(ov_compressed_model_dir, device=device.value, trust_remote_code=True) .. code:: ipython3 + from ov_nano_llava_helper import process_images, process_text_input + from PIL import Image + + prompt = "Describe this image in detail" + + messages = [{"role": "user", "content": f"\n{prompt}"}] + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" + image = Image.open(requests.get(url, stream=True).raw) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) + streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = ov_model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + display(image) + print(f"Question:\n{prompt}") + print("Answer:") + + output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + + +.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png .. parsed-literal:: - The image features a white, fluffy lamb with a playful, cheerful expression. The lamb is positioned in the center of the image, and it appears to be in motion, as if it's running. The lamb's face is white and it has a cute, adorable expression. It has a pair of bright, black eyes that are wide open, and it has a small, pink nose. The lamb's ears are also white and are quite large. The lamb's legs are white and are positioned behind it. The lamb's tail is also white and is quite long. The lamb's body is fluffy and it covers a large portion of the + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + + +.. parsed-literal:: + + Question: + Describe this image in detail + Answer: + The image portrays a charming and playful scene featuring a white lama. This adorable creature has a playful expression, with its eyes sparkling with joy and its nose in a playful smile. It is adorned with cute, tiny eyes that add a playful touch to its face. The lama's ears are also quite noticeable, with one of them sporting a tiny pink button. The lama's body is covered in fluffy, white fur, and it has its hind legs visible, adding a sense of movement to the image. + The lama is surrounded by a vivid display of fire. The flames are bright and lively, with some areas appearing more intense Interactive demo @@ -1103,11 +926,11 @@ Interactive demo .. code:: ipython3 - import time from transformers import TextIteratorStreamer, StoppingCriteria from threading import Thread - - + import torch + + class KeywordsStoppingCriteria(StoppingCriteria): def __init__(self, keywords, tokenizer, input_ids): self.keywords = keywords @@ -1122,7 +945,7 @@ Interactive demo self.keyword_ids.append(torch.tensor(cur_keyword_ids)) self.tokenizer = tokenizer self.start_len = input_ids.shape[1] - + def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len) self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids] @@ -1135,14 +958,14 @@ Interactive demo if keyword in outputs: return True return False - + def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: outputs = [] for i in range(output_ids.shape[0]): outputs.append(self.call_for_batch(output_ids[i].unsqueeze(0), scores)) return all(outputs) - - + + def bot_streaming(message, history): messages = [] if message["files"]: @@ -1151,7 +974,7 @@ Interactive demo for _, hist in enumerate(history): if isinstance(hist[0], tuple): image = hist[0][0] - + if len(history) > 0 and image is not None: messages.append({"role": "user", "content": f"\n{history[1][0]}"}) messages.append({"role": "assistant", "content": history[1][1]}) @@ -1172,29 +995,32 @@ Interactive demo messages.append({"role": "user", "content": f"\n{message['text']}"}) elif len(history) == 0 and image is None: messages.append({"role": "user", "content": message["text"]}) - + print(messages) image = Image.open(image).convert("RGB") text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) stop_str = "<|im_end|>" keywords = [stop_str] stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - image_tensor = ov_model.process_images([image], ov_model.config) generation_kwargs = dict( - input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=128, stopping_criteria=[stopping_criteria], temperature=0.01 + input_ids=input_ids, + attention_mask=attention_mask, + images=image_tensor, + streamer=streamer, + max_new_tokens=128, + stopping_criteria=[stopping_criteria], + temperature=0.01, ) thread = Thread(target=ov_model.generate, kwargs=generation_kwargs) thread.start() - + buffer = "" for new_text in streamer: buffer += new_text generated_text_without_prompt = buffer[:] - time.sleep(0.04) yield generated_text_without_prompt .. code:: ipython3 @@ -1202,11 +1028,11 @@ Interactive demo if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/nano-llava-multimodal-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(fn=bot_streaming) - + try: demo.launch(debug=False) except Exception: @@ -1219,17 +1045,12 @@ Interactive demo .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - + To create a public link, set `share=True` in `launch()`. -.. code:: ipython3 - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index 06636ec17bf7e9..b1db0093158982 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -81,7 +81,6 @@ Install requirements Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. magika 0.5.1 requires numpy<2.0,>=1.24; python_version >= "3.8" and python_version < "3.9", but you have numpy 1.23.5 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png index 67d21a28902ef3..fb2b3b2abdf69f 100644 --- a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png +++ b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bae0d18e0fe38cee76b5220d145c96e7d6d38b087cba46fc71db126eb3a88bb8 -size 175079 +oid sha256:1d300cd81ce6f44a5ee2ded6f14b739b0a8ecd8d5d2e487743cd085b92b3533b +size 175076 diff --git a/docs/notebooks/oneformer-segmentation-with-output.rst b/docs/notebooks/oneformer-segmentation-with-output.rst index cc94c7dbff9047..f0a9e5703e1644 100644 --- a/docs/notebooks/oneformer-segmentation-with-output.rst +++ b/docs/notebooks/oneformer-segmentation-with-output.rst @@ -63,14 +63,7 @@ Install required libraries .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" scipy ipywidgets Pillow tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" "matplotlib>=3.4" scipy ipywidgets Pillow tqdm Prepare the environment ----------------------- diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index 789fdeb53b1a45..a837b88ac6976b 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -407,7 +407,7 @@ It is pre-trained model optimized to work with TensorFlow Lite. .. parsed-literal:: - Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.1) + Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.3) .. code:: ipython3 @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -1092,5 +1092,5 @@ measure the time it takes now. .. parsed-literal:: - Loading the network to the AUTO device took 0.07 seconds. + Loading the network to the AUTO device took 0.08 seconds. diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index 4e4a53150f7504..b73dd8059aa65b 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -99,13 +99,10 @@ Clone repository and install requirements Cloning into 'OpenVoice'... remote: Enumerating objects: 438, done. - remote: Counting objects: 100% (238/238), done. - remote: Compressing objects: 100% (113/113), done. - remote: Total 438 (delta 178), reused 127 (delta 125), pack-reused 200 (from 1) - Receiving objects: 100% (438/438), 3.82 MiB | 16.31 MiB/s, done. - Resolving deltas: 100% (221/221), done. + remote: Total 438 (delta 0), reused 0 (delta 0), pack-reused 438 (from 1) + Receiving objects: 100% (438/438), 3.84 MiB | 19.30 MiB/s, done. + Resolving deltas: 100% (207/207), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -245,9 +242,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -261,9 +258,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -413,50 +410,50 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 - %5559 : Float(1, 192, 151, strides=[28992, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 152, strides=[29184, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 38912]) != torch.Size([1, 1, 39424]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 38656]) != torch.Size([1, 1, 39680]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152, 43]) != torch.Size([1, 1, 154, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 151, 43]) != torch.Size([1, 1, 155, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152]) != torch.Size([1, 1, 154]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 151]) != torch.Size([1, 1, 155]). _check_trace( - 2024-10-08 03:16:11.904034: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:08:53.401718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (199 / 199) │ 100% (199 / 199) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (199 / 199) │ 100% (199 / 199) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -479,27 +476,27 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -716,7 +713,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -871,7 +868,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -889,7 +886,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1078,7 +1075,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 59f72ff2fd84c3..9efa8e8e7b87f2 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -61,25 +61,16 @@ Guide =2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino-dev>=2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -381,12 +372,12 @@ Converting text-recognition-resnet-fc… .. parsed-literal:: ========== Converting text-recognition-resnet-fc to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx ONNX check passed successfully. ========== Converting text-recognition-resnet-fc to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. @@ -394,8 +385,8 @@ Converting text-recognition-resnet-fc… [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin diff --git a/docs/notebooks/optimize-preprocessing-with-output.rst b/docs/notebooks/optimize-preprocessing-with-output.rst index 2bfb2926bd44a8..9c5a60b553c26c 100644 --- a/docs/notebooks/optimize-preprocessing-with-output.rst +++ b/docs/notebooks/optimize-preprocessing-with-output.rst @@ -81,14 +81,8 @@ Settings .. code:: ipython3 - import platform - # Install openvino package - %pip install -q "openvino>=2023.1.0" opencv-python tqdm - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 @@ -103,7 +97,6 @@ Settings Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -360,7 +353,7 @@ for mean/scale normalization. .. parsed-literal:: - + @@ -391,7 +384,7 @@ may be specified is input data .. parsed-literal:: - + @@ -429,7 +422,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -643,6 +636,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0148 seconds per image, FPS: 67.77 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.46 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0154 seconds per image, FPS: 65.10 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.53 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index 6e0bcc263ed873..e5c426d1920f33 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -207,7 +207,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -253,7 +253,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index e6377775fd7b9b..43d5bfb39ed4e8 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:972e260b6c702c732ac3d1cb7cb2d8e38c9fe8ea0bd7b8e783e74f5d5c8cc6f5 -size 591296 +oid sha256:e5f67b60f77e15fb0b7908ada4ff64ab506accc23f579c81193175e5044f8f3b +size 593679 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index e3bb27d374c1aa..576878a578a323 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -63,7 +63,7 @@ Imports else: %pip install -q "paddlepaddle>=2.5.1" %pip install -q "paddleclas>=2.5.2" --no-deps - %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm + %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" # Install openvino package %pip install -q "openvino>=2023.1.0" @@ -89,11 +89,11 @@ Imports .. parsed-literal:: - --2024-10-08 03:19:19-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + --2024-10-23 02:12:15-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. Proxy request sent, awaiting response... 404 Not Found - 2024-10-08 03:19:19 ERROR 404: Not Found. + 2024-10-23 02:12:15 ERROR 404: Not Found. dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-10-08 03:19:21 INFO: Loading faiss with AVX512 support. - 2024-10-08 03:19:21 INFO: Successfully loaded faiss with AVX512 support. + 2024-10-23 02:12:17 INFO: Loading faiss with AVX512 support. + 2024-10-23 02:12:17 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +209,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/10/08 03:19:44] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/10/23 02:12:43] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -275,7 +275,7 @@ clipping values. .. parsed-literal:: - 2024-10-08 03:19:45 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-10-23 02:12:44 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,7 +287,7 @@ clipping values. .. parsed-literal:: - + @@ -462,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0077 seconds per image, FPS: 130.66 + PaddlePaddle model on CPU: 0.0073 seconds per image, FPS: 137.19 PaddlePaddle result: Labrador retriever, 0.75138 @@ -523,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.65 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 382.97 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/paint-by-example-with-output.rst b/docs/notebooks/paint-by-example-with-output.rst index ed3c0d2e0ab1d7..2f1371652c5750 100644 --- a/docs/notebooks/paint-by-example-with-output.rst +++ b/docs/notebooks/paint-by-example-with-output.rst @@ -58,7 +58,7 @@ This is the overall flow of the application: .. code:: ipython3 %pip install -q "torch>=2.1" torchvision --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm + %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" "matplotlib>=3.4" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm Download the model from `HuggingFace Paint-by-Example `__. diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 25e4d4bed03a7d..3bbb67d2232c99 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -75,7 +75,6 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. onnx 1.16.1 requires protobuf>=3.20.2, but you have protobuf 3.19.6 which is incompatible. paddlepaddle 2.6.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.19.6 which is incompatible. @@ -118,11 +117,11 @@ Load the original model and inference .. parsed-literal:: - 2024-10-08 03:20:24.075446: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:20:24.108997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:13:22.641328: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:13:22.675982: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. Flash attention 2 is not installed - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. @@ -141,7 +140,7 @@ Load the original model and inference @@ -195,7 +194,7 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -236,11 +235,11 @@ stage the model produces tokens during several runs. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): @@ -411,7 +410,7 @@ and run inference. @@ -469,8 +468,6 @@ Interactive inference Running on local URL: http://127.0.0.1:7860 - Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB - To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index b559c680b5503e..2b07305615f09e 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -128,22 +128,14 @@ Guide =2024.0.0" - %pip install -q opencv-python requests scipy tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q opencv-python requests scipy tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -372,7 +364,7 @@ Data Processing includes data preprocess and postprocess functions. - Data preprocess function is used to change the layout and shape of input data, according to requirement of the network input format. -- Data postprocess function is used to extract the useful information from +- Datapostprocess function is used to extract the useful information from network’s original output and visualize it. .. code:: ipython3 diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index 8293308ef73078..92224b9dd2d944 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb8c9f580a9b1dfe6c67e1ee3b4f0d25a254ef01ccdb10234899873f17a13093 -size 218731 +oid sha256:d40f6ee0940455d5f7692589affef1fe1e8b44d9a3ae36d46d9a1b70599a6698 +size 219519 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index bd1afd6a47a8ed..568072711ff2d8 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -17,9 +17,8 @@ post `__ - -**Table of contents:** +precision using `NNCF `__ #### +Table of contents: - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ @@ -89,10 +88,10 @@ Select Model -The tutorial supports the following models from Phi-3 model family: - -`Phi-3.5-vision-instruct `__ -- -`Phi-3-vision-128k-instruct `__ +The tutorial supports the following models from Phi-3 model family: + +- `Phi-3.5-vision-instruct `__ +- `Phi-3-vision-128k-instruct `__ You can select one from the provided options below. @@ -265,10 +264,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): @@ -365,7 +364,7 @@ documentation =3.9.2, but you have protobuf 3.20.3 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. @@ -200,10 +199,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-10-08 03:26:26.755777: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:26:26.790097: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:19:25.748160: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:19:25.783265: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:26:27.482079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:19:26.449413: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -224,15 +223,6 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: Loading PhotoMaker components [1] id_encoder from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30]... - - -.. parsed-literal:: - - The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. - - -.. parsed-literal:: - Loading PhotoMaker components [2] lora_weights from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30] @@ -401,20 +391,20 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (151 / 151) │ 100% (151 / 151) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (151 / 151) │ 100% (151 / 151) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -478,20 +468,20 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (73 / 73) │ 100% (73 / 73) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (73 / 73) │ 100% (73 / 73) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -510,11 +500,11 @@ sequence of latent text embeddings. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (194 / 194) │ 100% (194 / 194) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (194 / 194) │ 100% (194 / 194) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -584,26 +574,26 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (794 / 794) │ 100% (794 / 794) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (794 / 794) │ 100% (794 / 794) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -659,11 +649,11 @@ VAE decoder. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index 631e392394cfed..ef008d1d82ee52 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-10-08 03:34:29.221746: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:34:29.256826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 02:27:23.824587: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 02:27:23.860019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:34:29.928193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 02:27:24.531762: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -134,6 +134,7 @@ directly in latent space, achieving super fast inference with few steps. Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: ['caption_projection.y_embedding'] + You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 @@ -142,11 +143,6 @@ directly in latent space, achieving super fast inference with few steps. Loading checkpoint shards: 0%| | 0/4 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - - .. parsed-literal:: @@ -233,7 +229,7 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -275,11 +271,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -304,9 +300,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -452,7 +448,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -567,7 +563,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -617,13 +613,12 @@ layers and (2) activations of other layers. The steps are the following: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino INFO:nncf:1 ignored nodes were found by types in the NNCFGraph - INFO:nncf:1 ignored nodes were found by types in the NNCFGraph INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (290 / 290) │ 100% (290 / 290) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (290 / 290) │ 100% (290 / 290) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1551,13 +1546,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 3% (3 / 194) │ 0% (0 / 191) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 97% (191 / 194) │ 100% (191 / 191) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 3% (3 / 194) │ 0% (0 / 191) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 97% (191 / 194) │ 100% (191 / 191) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1576,13 +1571,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 98% (37 / 40) │ 0% (0 / 3) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 2% (3 / 40) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 98% (37 / 40) │ 0% (0 / 3) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 2% (3 / 40) │ 100% (3 / 3) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1653,7 +1648,7 @@ pipelines. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1710,9 +1705,9 @@ pipelines, we use mean inference time on 3 samples. .. parsed-literal:: - FP16 pipeline: 40.403 seconds - Optimized pipeline: 54.895 seconds - Performance speed-up: 0.736 + FP16 pipeline: 40.277 seconds + Optimized pipeline: 50.624 seconds + Performance speed-up: 0.796 Interactive inference @@ -1772,6 +1767,8 @@ to launch the interactive demo. Running on local URL: http://127.0.0.1:7860 + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/pixtral-with-output.rst b/docs/notebooks/pixtral-with-output.rst index f380b99cc7ee38..577aedf8550655 100644 --- a/docs/notebooks/pixtral-with-output.rst +++ b/docs/notebooks/pixtral-with-output.rst @@ -60,7 +60,7 @@ Prerequisites .. code:: ipython3 %pip install -q "torch>=2.1" torchvision "pillow" "tqdm" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/llava_model" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "nncf>=2.13.0" "openvino>=2024.4" %pip install -q "transformers>=4.45.0" --extra-index-url https://download.pytorch.org/whl/cpu @@ -71,7 +71,6 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -157,27 +156,21 @@ documentation True + Loading checkpoint shards: 100%|██████████████████| 6/6 [00:01<00:00, 3.46it/s] We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) - Using framework PyTorch: 2.2.2+cpu [ WARNING ] Unexpectedly found already patched module language_model.model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. @@ -460,47 +453,41 @@ documentation False [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. Export model to OpenVINO directly failed with: Config dummy inputs are not a subset of the model inputs: {'input'} vs {'args', 'kwargs'}. Model will be exported to ONNX - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - Saving external data to one file... + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (1 / 281) │ 0% (0 / 280) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (280 / 281) │ 100% (280 / 280) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:07 • 0:00:00 + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (1 / 281) │ 0% (0 / 280) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (280 / 281) │ 100% (280 / 280) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (3 / 172) │ 0% (0 / 169) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (169 / 172) │ 100% (169 / 169) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (3 / 172) │ 0% (0 / 169) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (169 / 172) │ 100% (169 / 169) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (1 / 1) │ 0% (0 / 0) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (1 / 1) │ 0% (0 / 0) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:02 • 0:00:00 @@ -561,13 +548,10 @@ Intel can be found in .. parsed-literal:: - 2024-10-08 04:29:37.124362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 04:29:37.158372: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 03:22:21.803644: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 03:22:21.838426: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 04:29:37.816800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Compiling the Language model to CPU ... - Compiling the Text embeddings model to CPU ... - Compiling the vision_embeddings to CPU ... + 2024-10-23 03:22:22.499374: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -611,7 +595,7 @@ Intel can be found in .. parsed-literal:: - The unusual aspect of this image is the presence of a cat and a dog lying together peacefully inside a cardboard box. This is not a common sight, as cats and dogs are often perceived as being natural enemies or at least not inclined to share spaces closely. The image portrays a harmonious and playful interaction between the two animals, which challenges typical stereotypes about their relationship. + The unusual aspect of this image is that the cat is lying inside a cardboard box, which is not a typical setting for a cat. Cats are often known for their affinity for boxes, but it is still considered unusual to see a cat comfortably resting inside a box in a living room setting. The cat appears relaxed and content, which adds to the charm of the scene. The presence of a sofa in the background further emphasizes the domestic and cozy atmosphere of the image. Interactive demo diff --git a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png index acdee31b5fc986..555f528de6532e 100644 --- a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png +++ b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c27bcedcb93b8775d73cdf416666ce2517ea493b556386f5f7ccd35d53ed15c3 -size 108140 +oid sha256:1909ffa4ca40de34e6ed63d20d6bbf34ec27667f31a937215382942546d817e7 +size 107972 diff --git a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst index 906f8bbc4434de..88db2dcec22b0d 100644 --- a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst +++ b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst @@ -557,9 +557,9 @@ performance. .. parsed-literal:: - PyTorch model on CPU: 0.039 seconds per image, FPS: 25.34 - ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 56.97 - OpenVINO IR model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 55.62 + PyTorch model on CPU: 0.042 seconds per image, FPS: 23.58 + ONNX model in OpenVINO Runtime/AUTO: 0.017 seconds per image, FPS: 57.35 + OpenVINO IR model in OpenVINO Runtime/AUTO: 0.028 seconds per image, FPS: 36.13 **Show Device Information** @@ -588,7 +588,7 @@ References - `Torchvision `__ - `Pytorch ONNX Documentation `__ -- `PIP install openvino-dev `__ +- `PIP install openvino `__ - `OpenVINO ONNX support `__ - `Model Conversion API diff --git a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst index d2b770f4402052..2c1a5c8d6e8107 100644 --- a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst +++ b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst @@ -159,7 +159,7 @@ Settings .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') @@ -449,15 +449,15 @@ I. Evaluate the loaded model .. parsed-literal:: - Test: [ 0/79] Time 0.290 (0.290) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) - Test: [10/79] Time 0.227 (0.240) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.232 (0.240) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.243 (0.239) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.251 (0.240) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.234 (0.240) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.242 (0.242) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.240 (0.242) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 18.879 + Test: [ 0/79] Time 0.324 (0.324) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [10/79] Time 0.320 (0.265) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) + Test: [20/79] Time 0.256 (0.262) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [30/79] Time 0.262 (0.260) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) + Test: [40/79] Time 0.253 (0.260) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.256 (0.259) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.253 (0.258) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.256 (0.258) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 20.127 Test accuracy of FP32 model: 60.740 @@ -500,10 +500,10 @@ Guide `__ - -**Table of contents:** +`NNCF `__ #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ @@ -79,11 +78,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_audio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-audio/ov_qwen2_audio_helper.py") open("ov_qwen2_audio_helper.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -212,13 +211,13 @@ documentation `__ .. code:: ipython3 from ov_qwen2_audio_helper import OVQwen2AudioForConditionalGeneration - + # Uncomment below lines to see the model inference class code # OVQwen2AudioForConditionalGeneration?? .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -423,20 +422,20 @@ Run model inference from transformers import AutoProcessor, TextStreamer import librosa import IPython.display as ipd - - + + processor = AutoProcessor.from_pretrained(model_dir) - + audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac" audio_chat_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav" audio_file = Path(audio_url.split("/")[-1]) audio_chat_file = Path(audio_chat_url.split("/")[-1]) - + if not audio_file.exists(): r = requests.get(audio_url) with audio_file.open("wb") as f: f.write(r.content) - + if not audio_chat_file.exists(): r = requests.get(audio_chat_url) with audio_chat_file.open("wb") as f: @@ -458,14 +457,14 @@ Voice chat ], }, ] - + text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]] - + inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True) display(ipd.Audio(audio_chat_file)) print("Answer:") - + generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -477,7 +476,7 @@ Voice chat .. raw:: html - + - + .. parsed-literal:: @@ -367,7 +347,7 @@ can be found in the Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. - + Download and convert model to OpenVINO IR via Optimum Intel CLI --------------------------------------------------------------- @@ -416,46 +396,6 @@ documentation `__. if exit_code != 0: raise Exception("Failed to load and convert model!") - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - - -**Export command:** - - - -``optimum-cli export openvino --model openai/whisper-tiny --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-tiny`` - - -.. parsed-literal:: - - 2024-10-08 06:43:10.758697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Moving the following attributes in the config to the generation config: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> False - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:1071: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_features.shape[-1] != expected_seq_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:388: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - - Run inference OpenVINO model with WhisperPipeline ------------------------------------------------- @@ -496,11 +436,9 @@ and put array as input. .. code:: ipython3 - sample = copy.deepcopy(en_raw_speech) + genai_result = ov_pipe.generate(en_raw_speech) - genai_result = ov_pipe.generate(sample) - - display(ipd.Audio(sample, rate=samplerate)) + display(ipd.Audio(en_raw_speech, rate=samplerate)) print(f"Result: {genai_result}") @@ -509,16 +447,61 @@ and put array as input. - + .. parsed-literal:: - Result: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + Result: Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + + +Whisper could provide a phrase-level timestamps for audio. Let’s try +this scenario, we will specify ``return_timestamps=True`` for +``generate`` method. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` in seconds. +.. code:: ipython3 + + genai_result_timestamps = ov_pipe.generate(en_raw_speech, return_timestamps=True) + + for segment in genai_result_timestamps.chunks: + print(f"{segment.start_ts}sec. ---> {segment.end_ts}sec.") + print(f"{segment.text}\n") + + +.. parsed-literal:: + + 0.0sec. ---> 3.0sec. + Colonel Jessif, did you order the code rate? + + 3.0sec. ---> 4.5sec. + You don't have to answer that question. + + 4.5sec. ---> 6.5sec. + I'll answer the question. + + 6.5sec. ---> 8.0sec. + You want answers? + + 8.0sec. ---> 9.0sec. + I think I'm entitled. + + 9.0sec. ---> 10.0sec. + You want answers? + + 10.0sec. ---> 11.0sec. + I want the truth. + + 11.0sec. ---> 13.0sec. + You can't handle the truth. + + Let’s see how to work the ``translate`` task. It supports for multilingual models only. For that case we will specify ``language`` and @@ -542,7 +525,7 @@ format. } if model_type.value == "Multilingual models": - sample = copy.deepcopy(mls_example["audio"]) + sample = mls_example["audio"] genai_result_ml = ov_pipe.generate(sample["array"], max_new_tokens=100, task="translate", language=languages_genai[SAMPLE_LANG.value]) @@ -559,7 +542,7 @@ format. Your browser does not support the audio element. - + .. parsed-literal:: @@ -567,7 +550,7 @@ format. Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. - + Compare performance PyTorch vs OpenVINO --------------------------------------- @@ -621,10 +604,10 @@ Compare performance PyTorch vs OpenVINO .. parsed-literal:: - Mean torch openai/whisper-tiny generation time: 0.273s - Mean openvino openai/whisper-tiny generation time: 0.166s - Performance openai/whisper-tiny openvino speedup: 1.650 - + Mean torch openai/whisper-tiny generation time: 0.624s + Mean openvino openai/whisper-tiny generation time: 0.344s + Performance openai/whisper-tiny openvino speedup: 1.814 + Quantization ------------ @@ -706,13 +689,7 @@ Below is an example of the whisper-tiny model Like the original PyTorch model, the OpenVINO model is also compatible with HuggingFace `pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +interface for ``automatic-speech-recognition``. .. code:: ipython3 @@ -721,14 +698,6 @@ seconds is optimal. To activate batching, pass the argument batch_size. ov_model = OVModelForSpeechSeq2Seq.from_pretrained(str(model_path), device=device.value) ov_processor = AutoProcessor.from_pretrained(str(model_path)) - -.. parsed-literal:: - - Compiling the encoder to CPU ... - Compiling the decoder to CPU ... - Compiling the decoder to CPU ... - - Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -797,14 +766,6 @@ negligible. from datasets import load_dataset from tqdm.notebook import tqdm - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - CALIBRATION_DATASET_SIZE = 30 @@ -868,127 +829,6 @@ negligible. ov_quantized_pipe = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/30 [00:00 - + Your browser does not support the audio element. - + .. parsed-literal:: - Original : Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. - Quantized: Mr Quilder is the apostle of the middle classes and we are glad to welcome his gospel. - + Original : Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + Quantized: Don, I'll just, if you order the code right. You don have to answer that question. I'll answer the question. You want answers. I think I'm entitled you want answer. I want the truth. You can't handle the truth. You can't handle the truth. + Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1096,10 +934,10 @@ for Word Error Rate. .. parsed-literal:: - Whole pipeline performance speedup: 1.381 + Whole pipeline performance speedup: 1.499 Whisper transcription word accuracy. Original model: 82.88%. Quantized model: 84.13%. Accuracy drop: -1.25%. - + Interactive demo ---------------- @@ -1115,7 +953,7 @@ upload button) or record using your microphone. import requests if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/distil-whisper-asr/gradio_helper.py") + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/whisper-asr-genai/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) from gradio_helper import make_demo, GradioPipeline @@ -1127,23 +965,9 @@ upload button) or record using your microphone. demo = make_demo(gr_pipeline) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(share=True, debug=False) + demo.launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/whisper-subtitles-generation-with-output.rst b/docs/notebooks/whisper-subtitles-generation-with-output.rst index ad479f160f3453..8bfbfa8a6e86a3 100644 --- a/docs/notebooks/whisper-subtitles-generation-with-output.rst +++ b/docs/notebooks/whisper-subtitles-generation-with-output.rst @@ -18,8 +18,11 @@ blog `__, `model card `__ and GitHub `repository `__. -In this notebook, we will use Whisper with OpenVINO to generate -subtitles in a sample video. Additionally, we will use +In this notebook, we will use Whisper model with `OpenVINO Generate +API `__ for `Whisper +automatic speech recognition +scenarios `__ +to generate subtitles in a sample video. Additionally, we will use `NNCF `__ improving model performance by INT8 quantization. Notebook contains the following steps: 1. Download the model. 2. Instantiate the PyTorch model pipeline. 3. @@ -75,11 +78,23 @@ Install dependencies. .. code:: ipython3 - %pip install -q "openvino>=2024.1.0" "nncf>=2.10.0" - %pip install -q "python-ffmpeg<=1.0.16" moviepy "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "yt_dlp>=2024.8.6" soundfile librosa jiwer + %pip install -q "nncf>=2.13.0" + %pip install -q --pre -U "openvino" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer %pip install -q "gradio>=4.19" +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + Instantiate model ----------------- @@ -135,7 +150,7 @@ Whisper family. .. parsed-literal:: - Dropdown(description='Model:', index=6, options=('openai/whisper-large-v3', 'openai/whisper-large-v2', 'openai… + Dropdown(description='Model:', index=7, options=('openai/whisper-large-v3-turbo', 'openai/whisper-large-v3', '… @@ -144,45 +159,31 @@ Convert model to OpenVINO Intermediate Representation (IR) format using Optimum- -The Hugging Face Optimum API is a high-level API that enables us to -convert and quantize models from the Hugging Face Transformers library -to the OpenVINO™ IR format. For more details, refer to the `Hugging Face -Optimum -documentation `__. +Listed Whisper model are available for downloading via the `HuggingFace +hub `__. We will use optimum-cli +interface for exporting it into OpenVINO Intermediate Representation +(IR) format. -Optimum Intel can be used to load optimized models from the `Hugging -Face Hub `__ and -create pipelines to run an inference with OpenVINO Runtime using Hugging -Face APIs. The Optimum Inference models are API compatible with Hugging -Face Transformers models. This means we just need to replace the -``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` -class. +Optimum CLI interface for converting models supports export to OpenVINO +(supported starting optimum-intel 1.12 version). General command format: -Below is an example of the whisper-tiny model +.. code:: bash -.. code:: diff + optimum-cli export openvino --model --task - -from transformers import AutoModelForSpeechSeq2Seq - +from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoTokenizer, pipeline - - model_id = "openai/whisper-tiny" - -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) - +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) - -Model class initialization starts with calling the ``from_pretrained`` -method. When downloading and converting the Transformers model, the -parameter ``export=True`` should be added. We can save the converted -model for the next usage with the ``save_pretrained`` method. -Alternatively, model conversion can be performed using Optimum-CLI -interface. You can find more details about Optimum-Intel and Optimum CLI -usage in this `tutorial `__. -The command bellow illustrates how to convert whisper using optimum cli. +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For LLMs it will be +``automatic-speech-recognition-with-past``. If model initialization +requires to use remote code, ``--trust-remote-code`` flag additionally +should be passed. Full list of supported arguments available via +``--help`` For more details and examples of usage, please check `optimum +documentation `__. .. code:: ipython3 - from pathlib import Path - model_dir = model_id.value.split("/")[-1] if not Path(model_dir).exists(): @@ -201,24 +202,12 @@ Whisper model. whisper_pipeline.png -Preprocessing and post-processing are important in this model use. -``transformers.AutoProcessor`` class used for initialization -``WhisperProcessor`` is responsible for preparing audio input data for -the PyTorch model, converting it to Mel-spectrogram and decoding -predicted output token_ids into string using tokenizer. Tokenizers and -Processors are distributed with models also compatible with the OpenVINO -model. - -Like the original PyTorch model, the OpenVINO model is also compatible -with HuggingFace -`pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +To simplify user experience we will use `OpenVINO Generate +API `__. +Firstly we will create pipeline with ``WhisperPipeline``. You can +construct it straight away from the folder with the converted model. It +will automatically load the ``model``, ``tokenizer``, ``detokenizer`` +and default ``generation configuration``. Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -227,12 +216,6 @@ Select inference device select device from dropdown list for running inference using OpenVINO -.. code:: ipython3 - - import openvino as ov - - core = ov.Core() - .. code:: ipython3 import requests @@ -244,7 +227,7 @@ select device from dropdown list for running inference using OpenVINO from notebook_utils import device_widget - device = device_widget() + device = device_widget(default="CPU", exclude=["NPU"]) device @@ -253,78 +236,46 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoProcessor, pipeline - - ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) - - processor = AutoProcessor.from_pretrained(model_dir) + import openvino_genai - pipe = pipeline( - "automatic-speech-recognition", - model=ov_model, - chunk_length_s=30, - tokenizer=processor.tokenizer, - feature_extractor=processor.feature_extractor, - ) + ov_pipe = openvino_genai.WhisperPipeline(str(model_dir), device=device.value) Run video transcription pipeline -------------------------------- -Now, we are ready to start transcription. We select a video from YouTube -that we want to transcribe. Be patient, as downloading the video may -take some time. +Now, we are ready to start transcription. Let’s load the video first. .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import download_file - VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" - link = widgets.Text( - value=VIDEO_LINK, - placeholder="Type link for video", - description="Video:", - disabled=False, - ) + output_file = Path("downloaded_video.mp4") - link - - + download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Sheldon%20Cooper%20Jim%20Parsons%20at%20Intels%20Lab.mp4", + filename=output_file.name, + ) .. parsed-literal:: - Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') + 'downloaded_video.mp4' already exists. -.. code:: ipython3 - - from pathlib import Path - import yt_dlp - - print(f"Downloading video {link.value} started") - - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - - print(f"Video saved to {output_file}") - .. parsed-literal:: - Downloading video https://youtu.be/kgL5LBM-hFI started - Video saved to downloaded_video.mp4 + PosixPath('/home/labuser/work/notebook/openvino_notebooks/notebooks/whisper-subtitles-generation/downloaded_video.mp4') + Select the task for the model: @@ -377,17 +328,31 @@ Select the task for the model: input_video.audio.write_audiofile(audio_file, verbose=False, logger=None) with open(audio_file, "rb") as f: inputs = f.read() - audio = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate) + audio = ffmpeg_read(inputs, 16000) return { "raw": audio, - "sampling_rate": pipe.feature_extractor.sampling_rate, + "sampling_rate": 16000, }, duration +Let’s run generation method. We will put input data as ``np array``. +Also we will specify ``task`` and ``return_timestamps=True`` options. If +task is ``translate``, you can place ``language`` option, for example +``<|fr|>`` for French or it would be detect automatically. We can set up +generation parameters in different ways. We can get default config with +``get_generation_config()``, setup parameters and put config directly to +``generate()``. It’s also possible to specify the needed options just as +inputs in the ``generate()`` method and we will use this way. Then we +just run ``generate`` method and get the output in text format. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` + .. code:: ipython3 inputs, duration = get_audio(output_file) - transcription = pipe(inputs, generate_kwargs={"task": task.value}, return_timestamps=True)["chunks"] + transcription = ov_pipe.generate(inputs["raw"], task=task.value, return_timestamps=True).chunks .. code:: ipython3 @@ -419,18 +384,19 @@ Select the task for the model: """ segment_lines = [] for idx, segment in enumerate(transcription): + timestamp = (segment.start_ts, segment.end_ts) # for the case where the model could not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. - if segment["timestamp"][1] is None: - segment["timestamp"] = (segment["timestamp"][0], filter_duration) + if segment.end_ts == -1: + timestamp[1] = filter_duration - if filter_duration is not None and (segment["timestamp"][0] >= math.floor(filter_duration) or segment["timestamp"][1] > math.ceil(filter_duration) + 1): + if filter_duration is not None and (timestamp[0] >= math.floor(filter_duration) or timestamp[1] > math.ceil(filter_duration) + 1): break segment_lines.append(str(idx + 1) + "\n") - time_start = format_timestamp(segment["timestamp"][0]) - time_end = format_timestamp(segment["timestamp"][1]) + time_start = format_timestamp(timestamp[0]) + time_end = format_timestamp(timestamp[1]) time_str = f"{time_start} --> {time_end}\n" segment_lines.append(time_str) - segment_lines.append(segment["text"] + "\n\n") + segment_lines.append(segment.text + "\n\n") return segment_lines "The results will be saved in the ``downloaded_video.srt`` file. SRT is @@ -457,7 +423,7 @@ Now let us see the results. .. parsed-literal:: - Video(value=b"\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00:'moov\x00\x00\x00lmvhd...", height='800… + Video(value=b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00Aimoov\x00\x00\x00lmvhd...', height='800… @@ -565,6 +531,42 @@ Please select below whether you would like to run Whisper quantization. %load_ext skip_kernel_extension +Let’s load converted OpenVINO model format using Optimum-Intel to easily +quantize it. + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ or +local folder to create pipelines to run an inference with OpenVINO +Runtime using Hugging Face APIs. The Optimum Inference models are API +compatible with Hugging Face Transformers models. This means we just +need to replace the ``AutoModelForXxx`` class with the corresponding +``OVModelForXxx`` class. + +Below is an example of the whisper-tiny model + +.. code:: diff + + -from transformers import AutoModelForSpeechSeq2Seq + +from optimum.intel.openvino import OVModelForSpeechSeq2Seq + from transformers import AutoTokenizer, pipeline + + model_id = "openai/whisper-tiny" + -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) + +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) + +Like the original PyTorch model, the OpenVINO model is also compatible +with HuggingFace +`pipeline `__ +interface for ``automatic-speech-recognition``. + +.. code:: ipython3 + + from transformers import AutoProcessor + from optimum.intel.openvino import OVModelForSpeechSeq2Seq + + ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) + processor = AutoProcessor.from_pretrained(model_dir) + Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -583,6 +585,9 @@ improves quantization quality. %%skip not $to_quantize.value from itertools import islice + from tqdm.notebook import tqdm + from datasets import load_dataset + from transformers import pipeline from optimum.intel.openvino.quantization import InferRequestWrapper @@ -629,28 +634,16 @@ negligible. import gc import shutil import nncf - from datasets import load_dataset - from tqdm.notebook import tqdm - - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - + import openvino as ov - CALIBRATION_DATASET_SIZE = 50 + CALIBRATION_DATASET_SIZE = 30 quantized_model_path = Path(f"{model_dir}_quantized") def quantize(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int): if not quantized_model_path.exists(): - encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset( - ov_model, calibration_dataset_size - ) + encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset(ov_model, calibration_dataset_size) print("Quantizing encoder") quantized_encoder = nncf.quantize( ov_model.encoder.model, @@ -658,7 +651,7 @@ negligible. subset_size=len(encoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.50) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.80), ) ov.save_model(quantized_encoder, quantized_model_path / "openvino_encoder_model.xml") del quantized_encoder @@ -672,7 +665,7 @@ negligible. subset_size=len(decoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96), ) ov.save_model(quantized_decoder_with_past, quantized_model_path / "openvino_decoder_with_past_model.xml") del quantized_decoder_with_past @@ -685,218 +678,24 @@ negligible. shutil.copy(model_path / "generation_config.json", quantized_model_path / "generation_config.json") shutil.copy(model_path / "openvino_decoder_model.xml", quantized_model_path / "openvino_decoder_model.xml") shutil.copy(model_path / "openvino_decoder_model.bin", quantized_model_path / "openvino_decoder_model.bin") + shutil.copy(model_path / "openvino_tokenizer.xml", quantized_model_path / "openvino_tokenizer.xml") + shutil.copy(model_path / "openvino_tokenizer.bin", quantized_model_path / "openvino_tokenizer.bin") + shutil.copy(model_path / "openvino_detokenizer.xml", quantized_model_path / "openvino_detokenizer.xml") + shutil.copy(model_path / "openvino_detokenizer.bin", quantized_model_path / "openvino_detokenizer.bin") + shutil.copy(model_path / "tokenizer_config.json", quantized_model_path / "tokenizer_config.json") + shutil.copy(model_path / "tokenizer.json", quantized_model_path / "tokenizer.json") + shutil.copy(model_path / "vocab.json", quantized_model_path / "vocab.json") + shutil.copy(model_path / "preprocessor_config.json", quantized_model_path / "preprocessor_config.json") + shutil.copy(model_path / "special_tokens_map.json", quantized_model_path / "special_tokens_map.json") + shutil.copy(model_path / "normalizer.json", quantized_model_path / "normalizer.json") + shutil.copy(model_path / "merges.txt", quantized_model_path / "merges.txt") + shutil.copy(model_path / "added_tokens.json", quantized_model_path / "added_tokens.json") - quantized_ov_model = OVModelForSpeechSeq2Seq.from_pretrained(quantized_model_path, compile=False) - quantized_ov_model.to(device.value) - quantized_ov_model.compile() - return quantized_ov_model + quantized_ov_pipe = openvino_genai.WhisperPipeline(str(quantized_model_path), device=device.value) + return quantized_ov_pipe - ov_quantized_model = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/50 [00:00 00:00:05,000 - What's that? - - 2 - 00:00:05,000 --> 00:00:07,000 - Oh, wow. - - 3 - 00:00:09,000 --> 00:00:11,000 - Hello humans. - - 4 - 00:00:14,000 --> 00:00:15,000 - Focus on me. - - 5 - 00:00:15,000 --> 00:00:16,000 - Focus on the guard. - - 6 - 00:00:18,000 --> 00:00:20,000 - Don't tell anyone what you're seen in here. - - 7 - 00:00:22,000 --> 00:00:24,000 - Have you seen what's in there? - - 8 - 00:00:24,000 --> 00:00:25,000 - They have intel. - - 9 - 00:00:25,000 --> 00:00:27,000 - This is where it all changes. - - - - Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -975,9 +725,6 @@ and performance stand-points. To measure accuracy, we use ``1 - WER`` as a metric, where WER stands for Word Error Rate. -When measuring inference time, we do it separately for encoder and -decoder-with-past model forwards, and for the whole model inference too. - .. code:: ipython3 %%skip not $to_quantize.value @@ -986,69 +733,34 @@ decoder-with-past model forwards, and for the whole model inference too. from contextlib import contextmanager from jiwer import wer, wer_standardize - TEST_DATASET_SIZE = 50 - MEASURE_TIME = False - - @contextmanager - def time_measurement(): - global MEASURE_TIME - try: - MEASURE_TIME = True - yield - finally: - MEASURE_TIME = False - - def time_fn(obj, fn_name, time_list): - original_fn = getattr(obj, fn_name) - - def wrapper(*args, **kwargs): - if not MEASURE_TIME: - return original_fn(\*args, \*\*kwargs) - start_time = time.perf_counter() - result = original_fn(\*args, \*\*kwargs) - end_time = time.perf_counter() - time_list.append(end_time - start_time) - return result - - setattr(obj, fn_name, wrapper) def calculate_transcription_time_and_accuracy(ov_model, test_samples): - encoder_infer_times = [] - decoder_with_past_infer_times = [] whole_infer_times = [] - time_fn(ov_model, "generate", whole_infer_times) - time_fn(ov_model.encoder, "forward", encoder_infer_times) - time_fn(ov_model.decoder_with_past, "forward", decoder_with_past_infer_times) ground_truths = [] predictions = [] for data_item in tqdm(test_samples, desc="Measuring performance and accuracy"): - input_features = extract_input_features(data_item) - - with time_measurement(): - predicted_ids = ov_model.generate(input_features) - transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) + start_time = time.perf_counter() + transcription = ov_model.generate(data_item["audio"]["array"], return_timestamps=True) + end_time = time.perf_counter() + whole_infer_times.append(end_time - start_time) ground_truths.append(data_item["text"]) - predictions.append(transcription[0]) + predictions.append(transcription.texts[0]) word_accuracy = (1 - wer(ground_truths, predictions, reference_transform=wer_standardize, hypothesis_transform=wer_standardize)) * 100 mean_whole_infer_time = sum(whole_infer_times) - mean_encoder_infer_time = sum(encoder_infer_times) - mean_decoder_with_time_infer_time = sum(decoder_with_past_infer_times) - return word_accuracy, (mean_whole_infer_time, mean_encoder_infer_time, mean_decoder_with_time_infer_time) + return word_accuracy, mean_whole_infer_time test_dataset = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True) test_dataset = test_dataset.shuffle(seed=42).take(TEST_DATASET_SIZE) test_samples = [sample for sample in test_dataset] - accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_model, test_samples) - accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(ov_quantized_model, test_samples) - print(f"Encoder performance speedup: {times_original[1] / times_quantized[1]:.3f}") - print(f"Decoder with past performance speedup: {times_original[2] / times_quantized[2]:.3f}") - print(f"Whole pipeline performance speedup: {times_original[0] / times_quantized[0]:.3f}") + accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_pipe, test_samples) + accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(quantized_ov_pipe, test_samples) + print(f"Whole pipeline performance speedup: {times_original / times_quantized:.3f}") print(f"Whisper transcription word accuracy. Original model: {accuracy_original:.2f}%. Quantized model: {accuracy_quantized:.2f}%.") print(f"Accuracy drop: {accuracy_original - accuracy_quantized:.2f}%.") @@ -1067,11 +779,9 @@ decoder-with-past model forwards, and for the whole model inference too. .. parsed-literal:: - Encoder performance speedup: 1.352 - Decoder with past performance speedup: 1.342 - Whole pipeline performance speedup: 1.350 - Whisper transcription word accuracy. Original model: 81.67%. Quantized model: 83.67%. - Accuracy drop: -1.99%. + Whole pipeline performance speedup: 1.452 + Whisper transcription word accuracy. Original model: 81.77%. Quantized model: 82.97%. + Accuracy drop: -1.20%. Interactive demo @@ -1081,18 +791,26 @@ Interactive demo .. code:: ipython3 - def transcribe(url, task, use_int8): - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - inputs, duration = get_audio(output_file) - m_pipe = int8_pipe if use_int8 else pipe - transcription = m_pipe(inputs, generate_kwargs={"task": task.lower()}, return_timestamps=True)["chunks"] + def_config = ov_pipe.get_generation_config() + + + def transcribe(video_path, task, use_int8): + data_path = Path(video_path) + inputs, duration = get_audio(data_path) + m_pipe = quantized_ov_pipe if use_int8 else ov_pipe + + frame_num = len(inputs["raw"]) / 16000 + if frame_num > 30: + config = ov_pipe.get_generation_config() + chink_num = math.ceil(frame_num / 30) + config.max_length = chink_num * def_config.max_length + m_pipe.set_generation_config(config) + + transcription = m_pipe.generate(inputs["raw"], task=task.lower(), return_timestamps=True).chunks srt_lines = prepare_srt(transcription, duration) - with output_file.with_suffix(".srt").open("w") as f: + with data_path.with_suffix(".srt").open("w") as f: f.writelines(srt_lines) - return [str(output_file), str(output_file.with_suffix(".srt"))] + return [str(data_path), str(data_path.with_suffix(".srt"))] if not Path("gradio_helper.py").exists(): @@ -1101,7 +819,7 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None) + demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None, sample_path=output_file) try: demo.launch(debug=False) diff --git a/docs/notebooks/wuerstchen-image-generation-with-output.rst b/docs/notebooks/wuerstchen-image-generation-with-output.rst index 8e61a42f359dad..342ddbafdc7edb 100644 --- a/docs/notebooks/wuerstchen-image-generation-with-output.rst +++ b/docs/notebooks/wuerstchen-image-generation-with-output.rst @@ -230,7 +230,8 @@ parameter to generate a less memory-demanding model. Text encoder model has 2 inputs: - ``input_ids``: vector of tokenized input sentence. Default tokenizer vector length is 77. -- ``attention_mask``: vector of same length as ``input_ids`` describing the attention mask. +- ``attention_mask``: vector of same length as ``input_ids`` describing + the attention mask. .. code:: ipython3 @@ -284,7 +285,6 @@ Decoder pipeline Decoder pipeline consists of 3 parts: decoder, text encoder and VQGAN. Decoder model is the WuerstchenDiffNeXt UNet decoder. Inputs are: - - ``x``: sample - ``r``: timestep - ``effnet``: interpolation block diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output.rst b/docs/notebooks/yolov11-instance-segmentation-with-output.rst index 931465dba1e79b..9d9b5e6bd15c0e 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov11-instance-segmentation-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -216,14 +216,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.3MB/s] + 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.1MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.0ms - Speed: 2.1ms preprocess, 66.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.4ms + Speed: 1.8ms preprocess, 66.4ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640) @@ -252,15 +252,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (5.9 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-seg_openvino_model/' (6.0 MB) Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=segment model=yolo11n-seg_openvino_model imgsz=640 half Validate: yolo val task=segment model=yolo11n-seg_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -331,7 +331,7 @@ Test on single image .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-seg_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... @@ -345,8 +345,8 @@ Test on single image .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.1ms - Speed: 2.1ms preprocess, 23.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.2ms + Speed: 3.6ms preprocess, 23.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -668,8 +668,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 10.9ms - Speed: 2.0ms preprocess, 10.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 17.7ms + Speed: 2.1ms preprocess, 17.7ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -717,18 +717,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.71 ms + [ INFO ] Read model took 20.05 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -738,7 +738,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.12 ms + [ INFO ] Reshape model took 8.82 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -746,7 +746,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 385.36 ms + [ INFO ] Compile model took 387.38 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -783,17 +783,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 37.32 ms + [ INFO ] First inference took 36.42 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 1794 iterations - [ INFO ] Duration: 15062.45 ms + [ INFO ] Count: 1788 iterations + [ INFO ] Duration: 15050.22 ms [ INFO ] Latency: - [ INFO ] Median: 50.09 ms - [ INFO ] Average: 50.21 ms - [ INFO ] Min: 43.19 ms - [ INFO ] Max: 65.81 ms - [ INFO ] Throughput: 119.10 FPS + [ INFO ] Median: 49.80 ms + [ INFO ] Average: 50.35 ms + [ INFO ] Min: 33.27 ms + [ INFO ] Max: 104.15 ms + [ INFO ] Throughput: 118.80 FPS .. code:: ipython3 @@ -808,18 +808,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.59 ms + [ INFO ] Read model took 29.13 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -837,7 +837,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 608.94 ms + [ INFO ] Compile model took 594.13 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -874,17 +874,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 26.24 ms + [ INFO ] First inference took 27.63 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 3636 iterations - [ INFO ] Duration: 15046.89 ms + [ INFO ] Count: 3714 iterations + [ INFO ] Duration: 15026.92 ms [ INFO ] Latency: - [ INFO ] Median: 24.53 ms - [ INFO ] Average: 24.70 ms - [ INFO ] Min: 12.80 ms - [ INFO ] Max: 40.79 ms - [ INFO ] Throughput: 241.64 FPS + [ INFO ] Median: 23.95 ms + [ INFO ] Average: 24.14 ms + [ INFO ] Min: 17.70 ms + [ INFO ] Max: 39.05 ms + [ INFO ] Throughput: 247.16 FPS Other ways to optimize model diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png index 32694d34889741..011e13859e8a3e 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceb4f1951cf92d0a2365750af32b945a90f65102311933328225f2dbd7b802c1 -size 496675 +oid sha256:e393b8edd8a20af1aaab6b69932bf0e715bf4f9c35814dd766dfa74ae27f2ae4 +size 493271 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output.rst b/docs/notebooks/yolov11-keypoint-detection-with-output.rst index cc830279de6753..97c549e6751dd0 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output.rst +++ b/docs/notebooks/yolov11-keypoint-detection-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') @@ -214,14 +214,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.1MB/s] + 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.3MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 56.4ms - Speed: 1.9ms preprocess, 56.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 59.6ms + Speed: 2.1ms preprocess, 59.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -250,15 +250,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-pose.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 56, 8400) (6.0 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... - OpenVINO: export success ✅ 1.9s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... + OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) - Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (2.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=pose model=yolo11n-pose_openvino_model imgsz=640 half Validate: yolo val task=pose model=yolo11n-pose_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco-pose.yaml half Visualize: https://netron.app @@ -334,12 +334,12 @@ ready to check model prediction. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 21.5ms - Speed: 2.5ms preprocess, 21.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 19.9ms + Speed: 2.3ms preprocess, 19.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -682,12 +682,12 @@ on the image. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 31.0ms - Speed: 2.1ms preprocess, 31.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 28.0ms + Speed: 2.0ms preprocess, 28.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640) @@ -737,18 +737,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.80 ms + [ INFO ] Read model took 19.55 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -757,14 +757,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.17 ms + [ INFO ] Reshape model took 8.54 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 338.05 ms + [ INFO ] Compile model took 329.35 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -801,17 +801,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 33.00 ms + [ INFO ] First inference took 33.67 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 17292 iterations - [ INFO ] Duration: 120045.86 ms + [ INFO ] Count: 17280 iterations + [ INFO ] Duration: 120049.75 ms [ INFO ] Latency: - [ INFO ] Median: 40.71 ms - [ INFO ] Average: 41.52 ms - [ INFO ] Min: 29.33 ms - [ INFO ] Max: 102.50 ms - [ INFO ] Throughput: 144.04 FPS + [ INFO ] Median: 40.74 ms + [ INFO ] Average: 41.55 ms + [ INFO ] Min: 24.20 ms + [ INFO ] Max: 98.48 ms + [ INFO ] Throughput: 143.94 FPS .. code:: ipython3 @@ -827,18 +827,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.03 ms + [ INFO ] Read model took 28.39 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -854,7 +854,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 558.98 ms + [ INFO ] Compile model took 556.59 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -891,17 +891,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.29 ms + [ INFO ] First inference took 30.44 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15060.72 ms + [ INFO ] Count: 5160 iterations + [ INFO ] Duration: 15031.11 ms [ INFO ] Latency: - [ INFO ] Median: 34.51 ms - [ INFO ] Average: 34.67 ms - [ INFO ] Min: 21.44 ms - [ INFO ] Max: 51.10 ms - [ INFO ] Throughput: 344.21 FPS + [ INFO ] Median: 34.62 ms + [ INFO ] Average: 34.76 ms + [ INFO ] Min: 25.46 ms + [ INFO ] Max: 51.89 ms + [ INFO ] Throughput: 343.29 FPS Compare accuracy of the Original and Quantized Models diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png index a03840fa0f4bec..f4e1de3947dc95 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6b92af78802b6615cc954b803042caa64f3969c1947652978beaaf6481a5104 -size 507485 +oid sha256:44dd0441baec6d73d3cc7552c26d2878c806514edacb3da332031733686c654b +size 507432 diff --git a/docs/notebooks/yolov11-object-detection-with-output.rst b/docs/notebooks/yolov11-object-detection-with-output.rst index 40256995ebd00f..d987f4148e7265 100644 --- a/docs/notebooks/yolov11-object-detection-with-output.rst +++ b/docs/notebooks/yolov11-object-detection-with-output.rst @@ -141,7 +141,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -207,14 +207,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.35M/5.35M [00:00<00:00, 24.0MB/s] + 100%|██████████| 5.35M/5.35M [00:00<00:00, 23.2MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 79.4ms - Speed: 2.5ms preprocess, 79.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 78.0ms + Speed: 2.3ms preprocess, 78.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -243,15 +243,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 1.8s, saved as 'yolo11n_openvino_model/' (5.4 MB) - Export complete (2.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (1.9s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=detect model=yolo11n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -326,12 +326,12 @@ ready to check model prediction for object detection. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 20.7ms - Speed: 2.8ms preprocess, 20.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 18.7ms + Speed: 2.0ms preprocess, 18.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -645,8 +645,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 17.5ms - Speed: 1.8ms preprocess, 17.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 19.7ms + Speed: 1.8ms preprocess, 19.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -697,18 +697,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.57 ms + [ INFO ] Read model took 18.73 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -717,14 +717,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 4.68 ms + [ INFO ] Reshape model took 8.03 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 319.17 ms + [ INFO ] Compile model took 316.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -761,17 +761,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.01 ms + [ INFO ] First inference took 30.27 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 18528 iterations - [ INFO ] Duration: 120050.83 ms + [ INFO ] Count: 18480 iterations + [ INFO ] Duration: 120044.94 ms [ INFO ] Latency: - [ INFO ] Median: 37.96 ms - [ INFO ] Average: 38.74 ms - [ INFO ] Min: 19.79 ms - [ INFO ] Max: 98.97 ms - [ INFO ] Throughput: 154.33 FPS + [ INFO ] Median: 38.09 ms + [ INFO ] Average: 38.84 ms + [ INFO ] Min: 20.45 ms + [ INFO ] Max: 97.13 ms + [ INFO ] Throughput: 153.94 FPS .. code:: ipython3 @@ -787,18 +787,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.43 ms + [ INFO ] Read model took 26.48 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -814,7 +814,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 515.22 ms + [ INFO ] Compile model took 556.87 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -851,17 +851,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 19.14 ms + [ INFO ] First inference took 31.34 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15055.81 ms + [ INFO ] Count: 5208 iterations + [ INFO ] Duration: 15031.88 ms [ INFO ] Latency: - [ INFO ] Median: 34.47 ms - [ INFO ] Average: 34.65 ms - [ INFO ] Min: 18.19 ms - [ INFO ] Max: 51.47 ms - [ INFO ] Throughput: 344.32 FPS + [ INFO ] Median: 34.21 ms + [ INFO ] Average: 34.44 ms + [ INFO ] Min: 18.94 ms + [ INFO ] Max: 52.48 ms + [ INFO ] Throughput: 346.46 FPS Next steps diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png index f6ad963c53f95c..02e6e38bb12a7b 100644 --- a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ed070bb0687ea1e958c91f59b98f38b9834911f23f78c1a8aad55bed3c7cf6d -size 569483 +oid sha256:0904f4ceeae66d4c516656b23389b16d93bc601214a4c9b8d1ee30f7b5366526 +size 572206 diff --git a/docs/notebooks/yolov8-obb-with-output.rst b/docs/notebooks/yolov8-obb-with-output.rst index 87e73f76558ffa..3d9ef3d88c0751 100644 --- a/docs/notebooks/yolov8-obb-with-output.rst +++ b/docs/notebooks/yolov8-obb-with-output.rst @@ -173,6 +173,7 @@ instance. + Run inference ~~~~~~~~~~~~~ diff --git a/docs/notebooks/yolov9-optimization-with-output.rst b/docs/notebooks/yolov9-optimization-with-output.rst index 0cf84003171753..39690ea292721d 100644 --- a/docs/notebooks/yolov9-optimization-with-output.rst +++ b/docs/notebooks/yolov9-optimization-with-output.rst @@ -58,22 +58,15 @@ Guide =2023.3.0" "nncf>=2.8.1" "opencv-python" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "nncf>=2.8.1" "opencv-python" "matplotlib>=3.4" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -100,9 +93,9 @@ Prerequisites Cloning into 'yolov9'... remote: Enumerating objects: 781, done. remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1) - Receiving objects: 100% (781/781), 3.27 MiB | 10.53 MiB/s, done. - Resolving deltas: 100% (330/330), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + Receiving objects: 100% (781/781), 3.27 MiB | 19.93 MiB/s, done. + Resolving deltas: 100% (331/331), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -140,7 +133,7 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') @@ -195,9 +188,11 @@ using ``ov.save_model``. .. parsed-literal:: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/experimental.py:243: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + ckpt = torch.load(attempt_download(w), map_location='cpu') # load Fusing layers... Model summary: 387 layers, 25288768 parameters, 0 gradients, 102.1 GFLOPs - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/801/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif self.dynamic or self.shape != shape: @@ -578,10 +573,10 @@ asymmetric quantization of activations. .. parsed-literal:: - 2024-10-08 06:55:28.833031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 06:55:28.867508: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-23 05:32:14.632848: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-23 05:32:14.668324: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 06:55:29.471960: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-10-23 05:32:15.276616: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -669,18 +664,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.27 ms + [ INFO ] Read model took 26.16 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [?,3,?,?] @@ -692,7 +687,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 7.74 ms + [ INFO ] Reshape model took 7.97 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -702,7 +697,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 487.17 ms + [ INFO ] Compile model took 475.37 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -739,17 +734,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 173.19 ms + [ INFO ] First inference took 182.45 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 222 iterations - [ INFO ] Duration: 15646.59 ms + [ INFO ] Count: 228 iterations + [ INFO ] Duration: 15670.39 ms [ INFO ] Latency: - [ INFO ] Median: 412.14 ms - [ INFO ] Average: 418.38 ms - [ INFO ] Min: 285.25 ms - [ INFO ] Max: 798.40 ms - [ INFO ] Throughput: 14.19 FPS + [ INFO ] Median: 412.82 ms + [ INFO ] Average: 410.86 ms + [ INFO ] Min: 309.65 ms + [ INFO ] Max: 431.51 ms + [ INFO ] Throughput: 14.55 FPS .. code:: ipython3 @@ -763,18 +758,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 41.47 ms + [ INFO ] Read model took 41.07 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [1,3,640,640] @@ -786,7 +781,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 0.04 ms + [ INFO ] Reshape model took 0.05 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -796,7 +791,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 870.18 ms + [ INFO ] Compile model took 943.07 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -833,17 +828,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 71.43 ms + [ INFO ] First inference took 64.84 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 726 iterations - [ INFO ] Duration: 15150.76 ms + [ INFO ] Count: 720 iterations + [ INFO ] Duration: 15158.98 ms [ INFO ] Latency: - [ INFO ] Median: 121.03 ms - [ INFO ] Average: 124.70 ms - [ INFO ] Min: 65.35 ms - [ INFO ] Max: 268.30 ms - [ INFO ] Throughput: 47.92 FPS + [ INFO ] Median: 119.92 ms + [ INFO ] Average: 125.90 ms + [ INFO ] Min: 80.89 ms + [ INFO ] Max: 277.84 ms + [ INFO ] Throughput: 47.50 FPS Run Live Object Detection diff --git a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png index 8d7867fc6e5b5a..d6b48379756722 100644 --- a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png +++ b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:071b74de5f9e1088f4d187a6c3d339cb90758fe7d12d7b7de495fd5266e50946 -size 499096 +oid sha256:014be25aa9fb5ebd46a7f69afca67c33d8059694ca54bc2e32c0abfa460cb825 +size 496427 diff --git a/docs/openvino_sphinx_theme/README.md b/docs/openvino_sphinx_theme/README.md index 7931c481c308aa..2e82fa06e8c185 100644 --- a/docs/openvino_sphinx_theme/README.md +++ b/docs/openvino_sphinx_theme/README.md @@ -4,7 +4,8 @@ 1. Install the `openvino_sphinx_theme` using `python`: ``` -python setup.py install --user +cd openvino/docs/openvino_sphinx_theme +python -m pip install --user . ``` 2. Update the `html_theme` variable in your `conf.py`: diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html index 25acb3c1e5cbda..a2ab53c6a57a83 100644 --- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html +++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html @@ -28,7 +28,7 @@ {# The data-cfasync attribute disables CloudFlare's Rocket loader so that #} {# mode/theme are correctly set before the browser renders the page. #} {# https://github.com/pydata/pydata-sphinx-theme/pull/1045 #} - + - - - - + + - - - - {% endblock %} - {% block docs_navbar %} {{ super() }} {% include 'baner.html' %} diff --git a/docs/sphinx_setup/_templates/search.html b/docs/sphinx_setup/_templates/search.html index 3519f6e7e02f19..5430f24f74aa8c 100644 --- a/docs/sphinx_setup/_templates/search.html +++ b/docs/sphinx_setup/_templates/search.html @@ -2,133 +2,100 @@ {% set title = _('Search') %} {%- block content %} - {% block docs_navbar %} - {{ super() }} - {% include 'baner.html' %} - {% endblock %} +{% block docs_navbar %} +{{ super() }} +{% include 'baner.html' %} +{% endblock %} - {% block body %} - - - - - - - - - - - -
- - -
- - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- {% endblock %} - - {%- block scripts_end %} - {{ _webpack.body_post() }} - {%- endblock %} +{% block body %} + + + + + + + + +
+ + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+
+{% endblock %} +{%- block scripts_end %} +{{ _webpack.body_post() }} {%- endblock %} + +{%- endblock %} \ No newline at end of file diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index def41af5943b3c..01c74de0175bcf 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -193,7 +193,6 @@ 'css/textfield.css', 'css/tabs.css', 'css/coveo_custom.css', - 'https://static.cloud.coveo.com/atomic/v2/themes/coveo.css', 'https://cdn.jsdelivr.net/npm/@splidejs/splide@4.1.4/dist/css/splide.min.css', ] diff --git a/samples/cpp/common/utils/src/args_helper.cpp b/samples/cpp/common/utils/src/args_helper.cpp index f4a3d10ceb0b5b..ba58f98e498e90 100644 --- a/samples/cpp/common/utils/src/args_helper.cpp +++ b/samples/cpp/common/utils/src/args_helper.cpp @@ -29,8 +29,7 @@ void readInputFilesArguments(std::vector& files, const std::string& arg) { struct stat sb; if (stat(arg.c_str(), &sb) != 0) { - slog::warn << "File " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " file or directory not found."); } if (S_ISDIR(sb.st_mode)) { struct CloseDir { @@ -43,17 +42,20 @@ void readInputFilesArguments(std::vector& files, const std::string& using Dir = std::unique_ptr; Dir dp(opendir(arg.c_str())); if (dp == nullptr) { - slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " directory cannot be opened!"); } struct dirent* ep; + size_t files_size = files.size(); while (nullptr != (ep = readdir(dp.get()))) { std::string fileName = ep->d_name; if (fileName == "." || fileName == "..") continue; files.push_back(arg + "/" + ep->d_name); } + if (files.size() == files_size) { + throw std::invalid_argument("No files were found in directory " + arg); + } } else { files.push_back(arg); } diff --git a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js index cfa5fd27b0fa4e..58cc6b3b3cf450 100644 --- a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js +++ b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js @@ -1,11 +1,39 @@ const { app } = require('electron'); const { addon: ov } = require('openvino-node'); -app.whenReady().then(() => { - console.log('Creating OpenVINO Runtime Core'); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const core = new ov.Core(); - console.log('Created OpenVINO Runtime Core'); +const epsilon = 0.5; // To avoid very small numbers +const pathToModel = '../tests/unit/test_models/test_model_fp32.xml'; + +main(); + +async function main() { + await app.whenReady(); + + try { + console.log('Creating OpenVINO Runtime Core'); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const core = new ov.Core(); + console.log('Created OpenVINO Runtime Core'); + + const model = await core.readModel(pathToModel); + console.log('Model read successfully:', model); + const compiledModel = await core.compileModel(model, 'CPU'); + const inferRequest = compiledModel.createInferRequest(); + console.log('Infer request created:', inferRequest); + + const tensorData = Float32Array.from( + { length: 3072 }, + () => Math.random() + epsilon, + ); + const tensor = new ov.Tensor(ov.element.f32, [1, 3, 32, 32], tensorData); + console.log('Tensor created:', tensor); + + const result = await inferRequest.inferAsync([tensor]); + console.log('Infer request result:', result); + } catch (error) { + console.error('Error:', error); + app.exit(1); + } app.exit(0); -}); +} diff --git a/src/bindings/js/node/tests/e2e/electron-app.test.js b/src/bindings/js/node/tests/e2e/electron-app.test.js index 01e84dea884502..98982a5f941263 100644 --- a/src/bindings/js/node/tests/e2e/electron-app.test.js +++ b/src/bindings/js/node/tests/e2e/electron-app.test.js @@ -1,24 +1,17 @@ /* global describe, it, before, after */ const fs = require('node:fs'); +const util = require('node:util'); const assert = require('node:assert'); const { exec } = require('child_process'); +const execPromise = util.promisify(exec); +const { testModels, downloadTestModel } = require('../unit/utils.js'); describe('E2E testing for OpenVINO as an Electron dependency.', function() { this.timeout(50000); - before((done) => { - exec( - 'cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project', - (error) => { - if (error) { - console.error(`exec error: ${error}`); - - return done(error); - } - - done(); - }, - ); + before(async () => { + await downloadTestModel(testModels.testModelFP32); + await execPromise('cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project'); }); it('should install dependencies', (done) => { @@ -37,7 +30,7 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { }); it('should run electron package and verify output', (done) => { - exec('cd demo-electron-app-project && npm start', (error, stdout) => { + exec(`cd demo-electron-app-project && npm start`, (error, stdout) => { if (error) { console.error(`exec error: ${error}`); @@ -48,6 +41,14 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { stdout.includes('Created OpenVINO Runtime Core'), 'Check that openvino-node operates fine', ); + assert( + stdout.includes('Model read successfully: ModelWrap {}'), + 'Check that model is read successfully', + ); + assert( + stdout.includes('Infer request result: { fc_out: TensorWrap {} }'), + 'Check that infer request result is successful', + ); done(); }); }); diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index b3a8267e4c1f14..a0fbf982105ad6 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.1.0 # Python bindings, frontends +numpy>=1.16.6,<2.2.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 @@ -8,7 +8,7 @@ pytest-html==4.1.1 pytest-timeout==2.3.1 # Python bindings -py>=1.9.0 +build<1.3 pygments>=2.8.1 setuptools>=65.6.1,<75.3.0 sympy>=1.10 @@ -18,8 +18,8 @@ patchelf<=0.17.2.1 # Frontends h5py>=3.1.0,<3.13.0 docopt~=0.6.2 -paddlepaddle==2.6.0 +paddlepaddle==2.6.2 tensorflow>=1.15.5,<2.18.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 -onnx==1.16.0 +onnx==1.17.0 diff --git a/src/bindings/python/docs/build.md b/src/bindings/python/docs/build.md index d0ab2e5f5f4e57..f824d9ccb8d82a 100644 --- a/src/bindings/python/docs/build.md +++ b/src/bindings/python/docs/build.md @@ -1,9 +1,18 @@ # Building the OpenVINO™ Python API **Refer to ["How to build OpenVINO" in OpenVINO™ developer documentation](../../../../docs/dev/build.md) for general building instructions.** - For each platform, you can build and install the API as a part of OpenVINO™ Toolkit or as a Python wheel. -A Python wheel is a portable package that allows you to install OpenVINO™ in either your Python distribution or a dedicated virtual environment. + +## Using Python Wheels for OpenVINO™ +Wheels are portable Python packages that are ready to install upon download. They are the commonly used binary distributions in Python as they avoid the compiling of extension modules and associated dependency issues on the user end. As a result, wheels installation is faster and smoother, simplifying the process for both developers and users. The ```.whl``` format is the default preference for ```pip``` when you run a ```pip install``` command. + +OpenVINO wheels for various platforms are available on [PyPI](https://pypi.org/project/openvino/#files), enabling you to install OpenVINO™ in your Python distribution or a dedicated virtual environment. + +OpenVINO builds two different wheels with separate ```setup.py``` files for [```openvino```](../wheel/setup.py) and [```openvino-dev```](../../../../tools/openvino_dev/setup.py). To build the wheels while building the project from source, your ```cmake``` command should include ```-DENABLE_PYTHON=ON``` and ```-DENABLE_WHEEL=ON```. Once built, the wheels can be found under ```openvino_install_dir/tools```. + +While wheels make installation easier, using wheels for development offers less customization than building from source and exporting ```PYTHONPATH``` and other environment variables to OpenVINO directories. Wheels are usually provided for specific packaged versions and might not contain the most recent changes that are available if you choose to clone the repository and build it yourself. + +To learn more about wheels and their use cases, check out the article [What Are Python Wheels and Why Should You Care?](https://realpython.com/python-wheels/). ## Virtual environments @@ -38,7 +47,9 @@ OpenVINO can be built based on specific virtual environments such as [venv](http 5. Install developer requirements for OpenVINO™ Python API while inside virtual environment: ```shell - cd + git clone https://github.com/openvinotoolkit/openvino.git + cd openvino + git submodule update --init --recursive pip install -r src/bindings/python/requirements.txt pip install -r src/bindings/python/requirements_test.txt ``` diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e311c6ed6438db..a2d63161fe764c 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.1.0 +numpy>=1.16.6,<2.2.0 openvino-telemetry>=2023.2.1 packaging diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index 9d16cbe5c9b21a..1aa2ff24b1b948 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -7,7 +7,7 @@ flake8-annotations-complexity<=0.0.8 flake8-broken-line<=1.0.0 flake8-bugbear<=24.8.19 flake8-class-attributes-order<=0.1.3 -flake8-comprehensions<=3.15.0 +flake8-comprehensions<=3.16.0 flake8-debugger<=4.1.2 flake8-docstrings<=1.7.0 flake8-eradicate<=1.5.0 @@ -30,7 +30,6 @@ pytest-forked; sys_platform != 'win32' pytest-xdist pytest-html pytest -py radon retrying tox diff --git a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py index 908a7f8660a94c..fb8f70e2a566bc 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py @@ -21,7 +21,7 @@ def __exit__(self, *args): self.state = None -def patch_model(model, module_extensions, orig_forward_name): +def patch_model(model, module_extensions, orig_forward_name, use_meta=False): def module_patcher(m, name): extension = None if m in module_extensions: @@ -32,13 +32,14 @@ def module_patcher(m, name): extension = module_extensions[name] if extension: + log.debug("Patching module %s", m) # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module. class Trampoline(torch.autograd.Function): target_extension = extension original_module = m - stashed_args = None - stashed_kwargs = None + stashed_args = tuple() + stashed_kwargs = {} @staticmethod @torch.jit.ignore @@ -53,26 +54,34 @@ def forward(*args, **kwargs): # set original forward for the module m.forward = getattr(m, orig_forward_name) # call user code - results = extension.evaluate( - m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs) # call user code + results = extension.evaluate(m, *Trampoline.stashed_args, + **Trampoline.stashed_kwargs) m.forward = patched_forward # return patched forward back return results def new_forward(*args, **kwargs): - Trampoline.stashed_args = args - Trampoline.stashed_kwargs = kwargs + # use meta device to store args, to save memory + if use_meta: + d = torch.device("meta") + Trampoline.stashed_args = tuple(a.to(d) for a in args) + Trampoline.stashed_kwargs = dict((k, v.to(d)) for k, v in kwargs.items()) + else: + Trampoline.stashed_args = args + Trampoline.stashed_kwargs = kwargs return extension.convert(m, Trampoline.apply, *args, **kwargs) + setattr(m, orig_forward_name, m.forward) m.forward = new_forward for name, m in model.named_modules(): if hasattr(m, orig_forward_name): - # already patched, skipping with a warning because it is unexpected - log.warning("Unexpectedly found already patched module %s while applying " - "ModuleExtension during PyTorch model conversion. " - "Result of the conversion maybe broken. Depending on the exact issue " - "it may lead to broken original model.", name) + # already patched, skipping. It may happen when patching applied for same module twice + log.debug("Unexpectedly found already patched module %s while applying " + "ModuleExtension during PyTorch model conversion. " + "Result of the conversion maybe broken. Depending on the exact issue " + "it may lead to broken original model.", name) continue + module_patcher(m, name) @@ -99,12 +108,18 @@ def __make_16bit_traceable(model: torch.nn.Module): torch.nn.Linear, "ov_ext::linear", evaluate=lambda module, *args, **kwargs: torch.full( list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)), + convert=lambda module, target_op, *args, **kwargs: target_op(args[0], + module.weight, + module.bias)), torch.nn.Embedding: ModuleExtension( torch.nn.Embedding, "ov_ext::embedding", evaluate=lambda module, *args, **kwargs: torch.full( list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)), + convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, + args[0], + module.padding_idx, + module.scale_grad_by_freq, + module.sparse)), } try: from transformers.pytorch_utils import Conv1D @@ -112,12 +127,15 @@ def __make_16bit_traceable(model: torch.nn.Module): Conv1D, "ov_ext::conv1d", evaluate=lambda module, *args, **kwargs: torch.full( list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)) + convert=lambda module, target_op, *args, **kwargs: target_op(args[0], + module.weight, + module.bias)) except: pass patch_model(model, extensions, - "_openvino_module_extension_patch_orig_forward") + "_openvino_module_extension_patch_orig_forward", use_meta=True) for _, module in model.named_modules(): - if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)]) - or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])): + if module.__class__ not in extensions and (any(p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)) + or any(b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False))): + log.debug("Casting module %s to float32", module) module.float() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py index c6c01fa98e5e99..9f2ef019769875 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py @@ -49,18 +49,22 @@ openvino_options = {} -@register_backend +# Disable regional compilation which was enabled by default from Torch 2.5.0 +if hasattr(torch._dynamo.config, "inline_inbuilt_nn_modules"): + torch._dynamo.config.inline_inbuilt_nn_modules=False + @fake_tensor_unsupported def openvino(subgraph, example_inputs, options=None): - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): global openvino_options openvino_options = options decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list() - return aot_autograd(fw_compiler=fx_openvino, - bw_compiler=fx_openvino, - decompositions=get_decompositions(decompositions))(subgraph, example_inputs) + return aot_autograd(fw_compiler=fx_openvino, bw_compiler=fx_openvino, decompositions=get_decompositions(decompositions))(subgraph, example_inputs) return fx_openvino(subgraph, example_inputs, options) +if "openvino" not in torch.compiler.list_backends(): + register_backend(compiler_fn=openvino, name="openvino") + def fx_openvino(subgraph, example_inputs, options=None): try: if len(openvino_options) != 0: @@ -70,7 +74,7 @@ def fx_openvino(subgraph, example_inputs, options=None): openvino_model_caching = _get_model_caching(options) if openvino_model_caching is not None and openvino_model_caching: # Create a hash to be used for caching - model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest() + model_hash_str = sha256(subgraph.code.encode("utf-8")).hexdigest() executor_parameters = {"model_hash_str": model_hash_str} # Check if the model was fully supported and already cached example_inputs.reverse() @@ -79,15 +83,17 @@ def fx_openvino(subgraph, example_inputs, options=None): if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"): # Model is fully supported and already cached. Run the cached OV model directly. compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs) + def _call(*args): res = execute_cached(compiled_model, *args) return res + return _call if inputs_reversed: example_inputs.reverse() preserved_arg_indices = [] - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): if tracing_context := torch._guards.TracingContext.try_get(): fw_metadata = tracing_context.fw_metadata params_flat = tracing_context.params_flat @@ -97,6 +103,7 @@ def _call(*args): model = subgraph else: from torch._subclasses.fake_tensor import FakeTensorMode + decompositions = _get_decompositions(options) + get_inf_decomposition_list() with FakeTensorMode(allow_non_fake_inputs=True): model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs) @@ -106,26 +113,27 @@ def _call(*args): partitioner = Partitioner(options) compiled_model = partitioner.make_partitions(model, options) - if executor_parameters is not None and 'model_hash_str' in executor_parameters: + if executor_parameters is not None and "model_hash_str" in executor_parameters: # Check if the model is fully supported. fully_supported = partitioner.check_fully_supported(compiled_model) if fully_supported: executor_parameters["model_hash_str"] += "_fs" def _call(*args): - if(_get_aot_autograd(options)): + if _get_aot_autograd(options): args_list = args[0] args_new = [args_list[i] for i in preserved_arg_indices] args = args_new - res = execute(compiled_model, *args, executor="openvino", - executor_parameters=executor_parameters, options=options) + res = execute(compiled_model, *args, executor="openvino", executor_parameters=executor_parameters, options=options) return res - if(_get_aot_autograd(options)): - _call._boxed_call = True # type: ignore[attr-defined] + + if _get_aot_autograd(options): + _call._boxed_call = True # type: ignore[attr-defined] return _call except Exception as e: logger.debug(f"Failed in OpenVINO execution: {e}") return compile_fx(subgraph, example_inputs) + def reset(): clear_caches() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py index a99fdb4ebc6d45..bb272b4f9adb53 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py @@ -114,9 +114,28 @@ def capture_gptq_patterns(self, graph_module: GraphModule): for pattern_op in enabled_ops: self.supported_ops.enable_by_name(pattern_op) + def capture_nncf_patterns(self, graph_module: GraphModule): + const_node = PatternNode + const_node.op_types["get_attr"] = None + bitwise_right_shift_node = PatternNode + bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor_Scalar"] = [const_node] + bitwise_and_node = PatternNode + bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [const_node,] + stack_node = PatternNode + stack_node.op_types["call_function:aten.stack.default"] = [bitwise_and_node, bitwise_right_shift_node] + + for node in graph_module.graph.nodes: + if str(node.op) == "call_function" and str(node.target) == "aten.stack.default": + enabled_ops = [] + pattern_match = self.check_pattern(node, bitwise_and_node, enabled_ops) + if pattern_match: + for pattern_op in enabled_ops: + self.supported_ops.enable_by_name(pattern_op) + def make_partitions(self, graph_module: GraphModule, options) -> GraphModule: allow_single_node_partition = _is_testing(options) self.capture_gptq_patterns(graph_module) + self.capture_nncf_patterns(graph_module) partitioner = CapabilityBasedPartitioner( graph_module, self.supported_ops, allows_single_node_partition=allow_single_node_partition) partitions = partitioner.propose_partitions() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 06a58eb80ffed1..826d766505fa79 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -46,6 +46,15 @@ def get_type_from_py_type(value): def torch_tensor_to_ov_const(torch_t: torch.Tensor, shared_memory=True): + is_fake_tensor = False + try: + from torch._prims import FakeTensor + is_fake_tensor = isinstance(torch_t, FakeTensor) + except: + pass + assert not is_fake_tensor, '`FakeTensor` is found in the graph during conversion. ' \ + 'In order to avoid `FakeTensor` in the traced model, ' \ + 'try to infer the model before exporting.' torch_t = torch_t.contiguous() if torch_t.dtype == torch.bfloat16: # reinterpret bfloat16 data as float16 to allow conversion to numpy diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index cb201d3d4263dd..a624ffb4f79873 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -15,7 +15,7 @@ from openvino.runtime.op import Constant, Result from openvino.runtime.opset1 import convert_like from openvino.runtime.opset_utils import _get_node_factory -from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading from openvino.runtime.utils.types import ( NumericData, NodeInput, @@ -271,7 +271,7 @@ def scaled_dot_product_attention( return _get_node_factory_opset13().create("ScaledDotProductAttention", inputs, attributes) -@singledispatch +@overloading(Union[NumericData, np.number, bool, np.bool_, list], Union[NumericType, Type], Optional[str], bool) # type: ignore @nameable_op def constant( value: Union[NumericData, np.number, bool, np.bool_, list], @@ -339,9 +339,9 @@ def display_shared_memory_warning(warning_message: str) -> None: return Constant(_value, shared_memory=_shared_memory) -@constant.register +@overloading(Tensor, bool, Optional[str]) # type: ignore @nameable_op -def _( +def constant( # noqa: F811 tensor: Tensor, shared_memory: bool = False, name: Optional[str] = None, diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 58fd90e7fd1051..c4dd48d9087ae1 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -2,10 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Inlcudes new operators added in Opset15 - -# TODO (ticket 138273): Add previous opset operators at the end of opset15 development -from openvino.runtime.opset1.ops import parameter +# New operations added in Opset15 from openvino.runtime.opset15.ops import col2im from openvino.runtime.opset15.ops import embedding_bag_offsets from openvino.runtime.opset15.ops import embedding_bag_packed @@ -16,4 +13,192 @@ from openvino.runtime.opset15.ops import bitwise_left_shift from openvino.runtime.opset15.ops import bitwise_right_shift from openvino.runtime.opset15.ops import slice_scatter + +# Operators from previous opsets +from openvino.runtime.opset1.ops import absolute +from openvino.runtime.opset1.ops import absolute as abs +from openvino.runtime.opset1.ops import acos +from openvino.runtime.opset4.ops import acosh +from openvino.runtime.opset8.ops import adaptive_avg_pool +from openvino.runtime.opset8.ops import adaptive_max_pool +from openvino.runtime.opset1.ops import add +from openvino.runtime.opset1.ops import asin +from openvino.runtime.opset4.ops import asinh +from openvino.runtime.opset6.ops import assign +from openvino.runtime.opset1.ops import atan +from openvino.runtime.opset4.ops import atanh +from openvino.runtime.opset14.ops import avg_pool +from openvino.runtime.opset5.ops import batch_norm_inference +from openvino.runtime.opset2.ops import batch_to_space +from openvino.runtime.opset1.ops import binary_convolution +from openvino.runtime.opset13.ops import bitwise_and +from openvino.runtime.opset13.ops import bitwise_not +from openvino.runtime.opset13.ops import bitwise_or +from openvino.runtime.opset13.ops import bitwise_xor +from openvino.runtime.opset3.ops import broadcast +from openvino.runtime.opset3.ops import bucketize +from openvino.runtime.opset1.ops import ceiling +from openvino.runtime.opset1.ops import ceiling as ceil +from openvino.runtime.opset1.ops import clamp +from openvino.runtime.opset1.ops import concat +from openvino.runtime.opset13.ops import constant +from openvino.runtime.opset1.ops import convert +from openvino.runtime.opset1.ops import convert_like +from openvino.runtime.opset14.ops import convert_promote_types +from openvino.runtime.opset1.ops import convolution +from openvino.runtime.opset1.ops import convolution_backprop_data +from openvino.runtime.opset1.ops import cos +from openvino.runtime.opset1.ops import cosh +from openvino.runtime.opset1.ops import ctc_greedy_decoder +from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.runtime.opset4.ops import ctc_loss +from openvino.runtime.opset3.ops import cum_sum +from openvino.runtime.opset3.ops import cum_sum as cumsum +from openvino.runtime.opset8.ops import deformable_convolution +from openvino.runtime.opset1.ops import deformable_psroi_pooling +from openvino.runtime.opset1.ops import depth_to_space +from openvino.runtime.opset8.ops import detection_output +from openvino.runtime.opset7.ops import dft +from openvino.runtime.opset1.ops import divide +from openvino.runtime.opset7.ops import einsum +from openvino.runtime.opset1.ops import elu +from openvino.runtime.opset3.ops import embedding_bag_offsets_sum +from openvino.runtime.opset3.ops import embedding_bag_packed_sum +from openvino.runtime.opset3.ops import embedding_segments_sum +from openvino.runtime.opset3.ops import extract_image_patches +from openvino.runtime.opset1.ops import equal +from openvino.runtime.opset1.ops import erf +from openvino.runtime.opset1.ops import exp +from openvino.runtime.opset9.ops import eye +from openvino.runtime.opset13.ops import fake_convert +from openvino.runtime.opset13.ops import fake_quantize +from openvino.runtime.opset1.ops import floor +from openvino.runtime.opset1.ops import floor_mod +from openvino.runtime.opset8.ops import gather +from openvino.runtime.opset6.ops import gather_elements +from openvino.runtime.opset8.ops import gather_nd +from openvino.runtime.opset1.ops import gather_tree +from openvino.runtime.opset7.ops import gelu +from openvino.runtime.opset9.ops import generate_proposals +from openvino.runtime.opset1.ops import greater +from openvino.runtime.opset1.ops import greater_equal +from openvino.runtime.opset9.ops import grid_sample +from openvino.runtime.opset1.ops import grn +from openvino.runtime.opset1.ops import group_convolution +from openvino.runtime.opset1.ops import group_convolution_backprop_data +from openvino.runtime.opset12.ops import group_normalization +from openvino.runtime.opset3.ops import gru_cell +from openvino.runtime.opset5.ops import gru_sequence +from openvino.runtime.opset1.ops import hard_sigmoid +from openvino.runtime.opset5.ops import hsigmoid +from openvino.runtime.opset4.ops import hswish +from openvino.runtime.opset7.ops import idft +from openvino.runtime.opset8.ops import if_op +from openvino.runtime.opset11.ops import interpolate +from openvino.runtime.opset14.ops import inverse +from openvino.runtime.opset9.ops import irdft +from openvino.runtime.opset10.ops import is_finite +from openvino.runtime.opset10.ops import is_inf +from openvino.runtime.opset10.ops import is_nan +from openvino.runtime.opset8.ops import i420_to_bgr +from openvino.runtime.opset8.ops import i420_to_rgb +from openvino.runtime.opset1.ops import less +from openvino.runtime.opset1.ops import less_equal +from openvino.runtime.opset1.ops import log +from openvino.runtime.opset1.ops import logical_and +from openvino.runtime.opset1.ops import logical_not +from openvino.runtime.opset1.ops import logical_or +from openvino.runtime.opset1.ops import logical_xor +from openvino.runtime.opset5.ops import log_softmax +from openvino.runtime.opset5.ops import loop +from openvino.runtime.opset1.ops import lrn +from openvino.runtime.opset4.ops import lstm_cell +from openvino.runtime.opset5.ops import lstm_sequence +from openvino.runtime.opset1.ops import matmul +from openvino.runtime.opset8.ops import matrix_nms +from openvino.runtime.opset14.ops import max_pool +from openvino.runtime.opset1.ops import maximum +from openvino.runtime.opset1.ops import minimum +from openvino.runtime.opset4.ops import mish +from openvino.runtime.opset1.ops import mod +from openvino.runtime.opset9.ops import multiclass_nms +from openvino.runtime.opset13.ops import multinomial +from openvino.runtime.opset1.ops import multiply +from openvino.runtime.opset6.ops import mvn +from openvino.runtime.opset1.ops import negative +from openvino.runtime.opset13.ops import nms_rotated +from openvino.runtime.opset9.ops import non_max_suppression +from openvino.runtime.opset3.ops import non_zero +from openvino.runtime.opset1.ops import normalize_l2 +from openvino.runtime.opset1.ops import not_equal +from openvino.runtime.opset8.ops import nv12_to_bgr +from openvino.runtime.opset8.ops import nv12_to_rgb +from openvino.runtime.opset1.ops import one_hot +from openvino.runtime.opset12.ops import pad +from openvino.runtime.opset1.ops import parameter +from openvino.runtime.opset1.ops import power +from openvino.runtime.opset1.ops import prelu +from openvino.runtime.opset8.ops import prior_box +from openvino.runtime.opset1.ops import prior_box_clustered +from openvino.runtime.opset1.ops import psroi_pooling +from openvino.runtime.opset4.ops import proposal +from openvino.runtime.opset4.ops import range +from openvino.runtime.opset8.ops import random_uniform +from openvino.runtime.opset9.ops import rdft +from openvino.runtime.opset6.ops import read_value +from openvino.runtime.opset4.ops import reduce_l1 +from openvino.runtime.opset4.ops import reduce_l2 +from openvino.runtime.opset1.ops import reduce_logical_and +from openvino.runtime.opset1.ops import reduce_logical_or +from openvino.runtime.opset1.ops import reduce_max +from openvino.runtime.opset1.ops import reduce_mean +from openvino.runtime.opset1.ops import reduce_min +from openvino.runtime.opset1.ops import reduce_prod +from openvino.runtime.opset1.ops import reduce_sum +from openvino.runtime.opset1.ops import region_yolo +from openvino.runtime.opset2.ops import reorg_yolo +from openvino.runtime.opset1.ops import relu +from openvino.runtime.opset1.ops import reshape +from openvino.runtime.opset13.ops import result +from openvino.runtime.opset1.ops import reverse_sequence +from openvino.runtime.opset3.ops import rnn_cell +from openvino.runtime.opset5.ops import rnn_sequence +from openvino.runtime.opset9.ops import roi_align +from openvino.runtime.opset2.ops import roi_pooling +from openvino.runtime.opset7.ops import roll +from openvino.runtime.opset5.ops import round +from openvino.runtime.opset13.ops import scaled_dot_product_attention +from openvino.runtime.opset12.ops import scatter_elements_update +from openvino.runtime.opset3.ops import scatter_update +from openvino.runtime.opset15.ops import search_sorted +from openvino.runtime.opset1.ops import select +from openvino.runtime.opset1.ops import selu +from openvino.runtime.opset3.ops import shape_of +from openvino.runtime.opset3.ops import shuffle_channels +from openvino.runtime.opset1.ops import sigmoid +from openvino.runtime.opset1.ops import sign +from openvino.runtime.opset1.ops import sin +from openvino.runtime.opset1.ops import sinh +from openvino.runtime.opset8.ops import slice +from openvino.runtime.opset8.ops import softmax +from openvino.runtime.opset4.ops import softplus +from openvino.runtime.opset9.ops import softsign +from openvino.runtime.opset2.ops import space_to_batch +from openvino.runtime.opset1.ops import space_to_depth +from openvino.runtime.opset1.ops import split +from openvino.runtime.opset1.ops import sqrt +from openvino.runtime.opset1.ops import squared_difference +from openvino.runtime.opset15.ops import squeeze from openvino.runtime.opset15.ops import stft +from openvino.runtime.opset1.ops import strided_slice +from openvino.runtime.opset1.ops import subtract +from openvino.runtime.opset4.ops import swish +from openvino.runtime.opset1.ops import tan +from openvino.runtime.opset1.ops import tanh +from openvino.runtime.opset1.ops import tensor_iterator +from openvino.runtime.opset1.ops import tile +from openvino.runtime.opset11.ops import topk +from openvino.runtime.opset1.ops import transpose +from openvino.runtime.opset10.ops import unique +from openvino.runtime.opset1.ops import unsqueeze +from openvino.runtime.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index c278120dab7432..93aacb29572340 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -326,4 +326,64 @@ def stft( :return: The new node performing STFT operation. """ inputs = as_nodes(data, window, frame_size, frame_step, name=name) - return _get_node_factory_opset15().create("STFT", inputs) + return _get_node_factory_opset15().create("STFT", inputs, {"transpose_frames": transpose_frames}) + + +@nameable_op +def search_sorted( + sorted_sequence: NodeInput, + values: NodeInput, + right_mode: bool = False, + name: Optional[str] = None, +) -> Node: + """Return a node which generates SearchSorted operation. + + :param sorted_sequence: The node providing sorted sequence to search in. + :param values: The node providing searched values. + :param right_mode: If set to False, return the first suitable index that is found for given value. + If set to True, return the last such index. Defaults to False. + :param name: The optional name for the created output node. + :return: The new node performing SearchSorted operation. + """ + inputs = as_nodes(sorted_sequence, values, name=name) + attributes = {"right_mode": right_mode} + return _get_node_factory_opset15().create("SearchSorted", inputs, attributes) + + +@nameable_op +def squeeze( + data: NodeInput, + axes: Optional[NodeInput] = None, + allow_axis_skip: bool = False, + name: Optional[str] = None, +) -> Node: + """Perform squeeze operation on input tensor. + + :param data: The node with data tensor. + :param axes: Optional list of integers, indicating the dimensions to squeeze. + Negative indices are supported. One of: input node or array. + :param allow_axis_skip: If true, shape inference results in a dynamic rank, when + selected axis has value 1 in its dynamic range. Used only if axes input + is given. Defaults to false. + :param name: Optional new name for output node. + :return: The new node performing a squeeze operation on input tensor. + + Remove single-dimensional entries from the shape of a tensor. + Takes an optional parameter `axes` with a list of axes to squeeze. + If `axes` is not provided, all the single dimensions will be removed from the shape. + + For example: + + Inputs: tensor with shape [1, 2, 1, 3, 1, 1], axes=[2, 4] + + Result: tensor with shape [1, 2, 3, 1] + """ + if axes is None: + inputs = as_nodes(data, name=name) + else: + inputs = as_nodes(data, axes, name=name) + return _get_node_factory_opset15().create( + "Squeeze", + inputs, + {"allow_axis_skip": allow_axis_skip} + ) diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index dd90ded374ca11..d1dce289d09941 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -23,3 +23,4 @@ from openvino._pyopenvino.properties.hint import allow_auto_batching from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size from openvino._pyopenvino.properties.hint import kv_cache_precision +from openvino._pyopenvino.properties.hint import activations_scale_factor diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index a6b30bd773001f..564e5f69f5ee14 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -101,6 +101,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching"); wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size"); wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision"); + wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor"); // Submodule intel_cpu py::module m_intel_cpu = diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py index e28a4ad05510f2..131654855b380a 100644 --- a/src/bindings/python/tests/test_graph/test_constant.py +++ b/src/bindings/python/tests/test_graph/test_constant.py @@ -87,7 +87,7 @@ def test_init_with_array(src_dtype, dst_dtype, shared_flag, data_getter): data = np.ascontiguousarray(data) # Create constant from based on numpy dtype or openvino type - ov_const = ops.constant(data, dtype=dst_dtype, shared_memory=shared_flag) + ov_const = ops.constant(data, dst_dtype, shared_memory=shared_flag) # Check shape and element type of Constant class assert isinstance(ov_const, Constant) @@ -842,7 +842,7 @@ def test_get_data_casting_bf16(src_dtype, dst_dtype, copy_flag): ) def test_get_data_casting_packed(src_dtype, ov_type, dst_dtype, copy_flag): data = np.array([[0, 0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1]], dtype=src_dtype) - ov_const = ops.constant(data, dtype=ov_type) + ov_const = ops.constant(value=data, dtype=ov_type) arr = ov_const.get_data(dtype=dst_dtype, copy=copy_flag) if dst_dtype is None: @@ -867,7 +867,7 @@ def test_const_from_tensor(shared_flag): shape = [1, 3, 32, 32] arr = np.ones(shape).astype(np.float32) ov_tensor = Tensor(arr, shape, Type.f32) - ov_const = ops.constant(ov_tensor, shared_memory=shared_flag) + ov_const = ops.constant(tensor=ov_tensor, shared_memory=shared_flag) assert isinstance(ov_const, Constant) assert np.all(list(ov_const.shape) == shape) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index 940f8244f427b8..98d0ec3583882c 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -2492,8 +2492,8 @@ def test_stft(): window = ov.parameter([7], name="window", dtype=np.float32) frame_size = ov.constant(np.array(11, dtype=np.int32)) frame_step = ov.constant(np.array(3, dtype=np.int32)) - transpose_frames = True + transpose_frames = False op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) assert op.get_type_name() == "STFT" @@ -2501,6 +2501,42 @@ def test_stft(): assert op.get_output_element_type(0) == Type.f32 assert op.get_output_shape(0) == [4, 13, 6, 2] + transpose_frames = True + op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) + + assert op.get_type_name() == "STFT" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_shape(0) == [4, 6, 13, 2] + + +def test_search_sorted(): + sorted_sequence = ov.parameter([7, 256, 200, 200], name="sorted", dtype=np.float32) + values = ov.parameter([7, 256, 200, 10], name="values", dtype=np.float32) + op = ov_opset15.search_sorted(sorted_sequence=sorted_sequence, values=values, name="default") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is False + assert op.get_friendly_name() == "default" + + op = ov_opset15.search_sorted(sorted_sequence, values, right_mode=True, name="right") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is True + assert op.get_friendly_name() == "right" + + op = ov_opset15.search_sorted(sorted_sequence, values, False, name="left") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is False + assert op.get_friendly_name() == "left" + def test_parameter_get_attributes(): parameter = ov.parameter([2, 2], dtype=np.float32, name="InputData") diff --git a/src/bindings/python/tests/test_graph/test_ops_fused.py b/src/bindings/python/tests/test_graph/test_ops_fused.py index bdbf4a1a9f1f9c..2bab743bfd7afb 100644 --- a/src/bindings/python/tests/test_graph/test_ops_fused.py +++ b/src/bindings/python/tests/test_graph/test_ops_fused.py @@ -110,17 +110,6 @@ def test_clamp_operator(): assert list(model.get_output_shape(0)) == [2, 2] -def test_squeeze_operator(): - data_shape = [1, 2, 1, 3, 1, 1] - parameter_data = ov.parameter(data_shape, name="Data", dtype=np.float32) - axes = [2, 4] - model = ov.squeeze(parameter_data, axes) - - assert model.get_type_name() == "Squeeze" - assert model.get_output_size() == 1 - assert list(model.get_output_shape(0)) == [1, 2, 3, 1] - - def test_squared_difference_operator(): x1_shape = [1, 2, 3, 4] x2_shape = [2, 3, 4] diff --git a/src/bindings/python/tests/test_graph/test_squeeze.py b/src/bindings/python/tests/test_graph/test_squeeze.py new file mode 100644 index 00000000000000..869d84a0414841 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_squeeze.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino.runtime.opset1 as ov_opset1 +import openvino.runtime.opset15 as ov_opset15 +import numpy as np +import pytest + + +def test_squeeze_v1_operator(): + data_shape = [1, 2, 1, 3, 1, 1] + parameter_data = ov_opset1.parameter(data_shape, name="Data", dtype=np.float32) + axes = [2, 4] + model = ov_opset1.squeeze(parameter_data, axes) + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 1] + + +@pytest.mark.parametrize(("input_shape", "axes", "allow_axis_skip", "expected_shape"), [ + ((1, 2, 1, 3, 1, 1), [1, 2, 4], True, [1, 2, 3, 1]), + ((1, 2, 1, 3, 1, 1), [1, 2, 4], False, [1, 2, 3, 1]), + ((2, -1, 3), [1], False, [2, 3]) +]) +def test_squeeze_v15_operator(input_shape, axes, allow_axis_skip, expected_shape): + parameter_data = ov_opset15.parameter(input_shape, name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, axes, allow_axis_skip, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == expected_shape + + +def test_squeeze_v15_dynamic_rank_output(): + parameter_data = ov_opset15.parameter((2, -1, 3), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, [1], True, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert model.get_output_partial_shape(0).to_string() == "[...]" + + +def test_squeeze_v15_axes_not_given(): + parameter_data = ov_opset15.parameter((1, 3, 1, 1, 3, 5), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(data=parameter_data, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [3, 3, 5] diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 32eb48f6765f41..6065d72196b44b 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -335,6 +335,11 @@ def test_properties_ro(ov_property_ro, expected_value): ((64, 64),), ), (hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)), + ( + hints.activations_scale_factor, + "ACTIVATIONS_SCALE_FACTOR", + ((0.0, 0.0),), + ), ( intel_cpu.denormals_optimization, "CPU_DENORMALS_OPTIMIZATION", diff --git a/src/bindings/python/wheel/CMakeLists.txt b/src/bindings/python/wheel/CMakeLists.txt index 7d8ebd5256a968..72828a155dd865 100644 --- a/src/bindings/python/wheel/CMakeLists.txt +++ b/src/bindings/python/wheel/CMakeLists.txt @@ -100,31 +100,14 @@ set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}") # create target for openvino.wheel # -execute_process(COMMAND ${Python3_EXECUTABLE} -m pip --version - OUTPUT_VARIABLE pip_version OUTPUT_STRIP_TRAILING_WHITESPACE) - -string(REGEX MATCH "pip[ ]+([\\.0-9]*)" pip_version "${pip_version}") -set(pip_version ${CMAKE_MATCH_1}) - -if(pip_version VERSION_GREATER_EQUAL 22.0) - set(wheel_build_command - ${Python3_EXECUTABLE} -m pip wheel - --no-deps - --wheel-dir ${openvino_wheels_output_dir} - --verbose - --build-option --build-number=${WHEEL_BUILD} - --build-option --plat-name=${PLATFORM_TAG} - "${CMAKE_CURRENT_SOURCE_DIR}") -else() - set(wheel_build_command - ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" - --quiet - --no-user-cfg - bdist_wheel - --dist-dir ${openvino_wheels_output_dir} - --build-number=${WHEEL_BUILD} - --plat-name=${PLATFORM_TAG}) -endif() +# for --config-setting explanation see https://github.com/pypa/setuptools/issues/2491 +set(wheel_build_command + ${Python3_EXECUTABLE} -m build "${CMAKE_CURRENT_SOURCE_DIR}" + --outdir ${openvino_wheels_output_dir} + --config-setting=--build-option=--build-number=${WHEEL_BUILD} + --config-setting=--build-option=--plat-name=${PLATFORM_TAG} + --config-setting=--quiet + --wheel) add_custom_command(OUTPUT ${openvino_wheel_path} COMMAND ${setup_py_env} ${wheel_build_command} diff --git a/src/bindings/python/wheel/requirements-dev.txt b/src/bindings/python/wheel/requirements-dev.txt index c88d91751ebbc9..a6c2dcbdf8fee1 100644 --- a/src/bindings/python/wheel/requirements-dev.txt +++ b/src/bindings/python/wheel/requirements-dev.txt @@ -1,4 +1,5 @@ -c ../constraints.txt setuptools wheel +build patchelf; sys_platform == 'linux' and platform_machine == 'x86_64' diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index c5932ed690d670..040982feb4e0ec 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -169,6 +169,10 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM) { } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM_withMul) { +#if defined(WIN32) && !defined(NDEBUG) + test_skipped = true; + GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258."; +#endif const auto& f = MHASplitMFunction(std::vector{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), std::vector{{2, 64, 12, 64}, {128, 12, 1, 64}, {12, 2, 64, 128}, {1, 128, 12, 64}, {128, 12, 64}}, @@ -191,6 +195,10 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM) { } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM_withMul) { +#if defined(WIN32) && !defined(NDEBUG) + test_skipped = true; + GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258."; +#endif const auto& f = MHASplitMFunction(std::vector{{1, 384, 16, 64}, {1, 384, 16, 64}, {1, 1, 1, 384}, {1, 384, 16, 64}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), std::vector{{1, 6, 64, 16, 64}, {1, 384, 16, 1, 64}, {1, 1, 1, 1, 384}, {1, 1, 384, 16, 64}, {1, 384, 16, 64}}, diff --git a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp index 69c148305fb94f..2eb79322b84e28 100644 --- a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp +++ b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp @@ -14,29 +14,75 @@ namespace internal { /// \brief Operator performing Dynamic Quantize class TRANSFORMATIONS_API DynamicQuantize : public ov::op::Op { public: - OPENVINO_OP("DynamicQuantize", "gpu_opset"); + OPENVINO_OP("DynamicQuantize", "ie_internal_opset"); + + /** + * @brief Configuration for the type of quantization applied to the data: + * - Symmetric: Quantization where the zero point is fixed at zero, and the range is symmetric around zero. + * - Asymmetric: Quantization where the zero point is not fixed at zero. + */ + enum class QuantizationType { Symmetric, Asymmetric }; + + /** + * @brief Configuration for how Activations, Scales and Zero Points will be stored in output buffers: + * - Planar: Activations, Scales, and Zero Points are stored in independent buffers. + * - InterleavedScalesZP: Activations are stored in an independent buffer, while Scales and Zero Points (if any) are + * combined in a separate buffer. + */ + enum class OutputStorageType { Planar, InterleavedScalesZP, /* InterleavedActivationsScalesZP */ }; + + /// \brief Structure that specifies attributes for interpolation + struct Attributes { + QuantizationType quantization_type = QuantizationType::Symmetric; + element::Type quantization_dt = element::undefined; + element::Type scale_dt = element::undefined; + element::Type zp_dt = element::undefined; + + std::vector group_sizes = {}; + std::vector scales_zp_output_order = {}; + OutputStorageType output_storage_type = OutputStorageType::Planar; + }; DynamicQuantize() = default; /// \brief Constructs an DynamicQuantize operation. /// /// \param data Input tensor with data - /// \param group_sizes Group sizes for dynamic quantization - /// \param dt_scale Data type for scale output - DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale); + /// \param config Dynamic quantization configuration + DynamicQuantize(const Output& data, const Attributes& attrs); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + const Attributes& get_attrs() const { + return m_attrs; + } + + void set_attrs(Attributes attrs) { + m_attrs = std::move(attrs); + } + const std::vector& get_group_sizes() const { - return m_group_sizes; - }; + return m_attrs.group_sizes; + } + + QuantizationType get_quantization_type() const { + return m_attrs.quantization_type; + } + + OutputStorageType get_output_storage_type() const { + return m_attrs.output_storage_type; + } + + const std::vector& get_scales_zp_output_order() const { + return m_attrs.scales_zp_output_order; + } + static std::vector shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes); + const std::vector& input_shapes); -private: - std::vector m_group_sizes; - element::Type m_dt_scale; +protected: + Attributes m_attrs; }; } // namespace internal diff --git a/src/common/transformations/include/ov_ops/lora_subgraph.hpp b/src/common/transformations/include/ov_ops/lora_subgraph.hpp new file mode 100644 index 00000000000000..75aaa16a5d280e --- /dev/null +++ b/src/common/transformations/include/ov_ops/lora_subgraph.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "openvino/op/util/sub_graph_base.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace op { +namespace internal { +/** + * @interface LoraSubgraph + * @brief LoraSubgraph operation, which is used for LoRA subgraphs fusion. + * It always has only 1 output, and the following inputs, whose order is fixed: + * 1. main_flow_input: input from original model. + * 2. LoRA_input: input to which the Low-Rank adaptation is applied. + * The adapted input is combined with `main_flow_input`. + * 3. LoRA_matrices: 3 Low-Rank adaptation matrices applied to `LoRA_input`. + * The fused subgraph can be optimized in runtime based on LoRA semantic. + * For instance, `main_flow_input` can be fast-forwarded to output in case of empty `LoRA_matrices`. + */ +class TRANSFORMATIONS_API LoraSubgraph : public ov::op::util::SubGraphOp { +public: + OPENVINO_OP("LoraSubgraph", "ie_internal_opset", ov::op::util::SubGraphOp); + + LoraSubgraph() = default; + LoraSubgraph(const OutputVector& args, const std::shared_ptr& body); + + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp new file mode 100644 index 00000000000000..8422ad95f262c6 --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API LoraSubgraphFusion; + +} // namespace pass +} // namespace ov + +class ov::pass::LoraSubgraphFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("LoraSubgraphFusion", "0"); + LoraSubgraphFusion(); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp b/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp new file mode 100644 index 00000000000000..020b4e236fcac5 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertSliceScatter; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertSliceScatter : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertSliceScatter", "0"); + ConvertSliceScatter(); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp new file mode 100644 index 00000000000000..c2ebfbc0f3138b --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +/** + * @ingroup ov_transformation_common_api + * @brief Converts Squeeze v15 to Squeeze v0. + */ +class TRANSFORMATIONS_API ConvertSqueeze15ToSqueeze0 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertSqueeze15ToSqueeze0", "0"); + ConvertSqueeze15ToSqueeze0(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp similarity index 100% rename from src/plugins/intel_cpu/src/utils/print_model.hpp rename to src/common/transformations/include/transformations/utils/print_model.hpp diff --git a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp index 74c0498e9a4425..9d1dfa5e5e3f62 100644 --- a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp +++ b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp @@ -7,62 +7,113 @@ #include "openvino/core/partial_shape.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/op/variadic_split.hpp" -#include "variadic_split_shape_inference.hpp" +#include "openvino/util/common_util.hpp" namespace ov { namespace op { namespace internal { -DynamicQuantize::DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale) - : Op({data}), - m_group_sizes(std::move(group_sizes)), - m_dt_scale(dt_scale) { - OPENVINO_ASSERT(data.get_partial_shape().rank() == m_group_sizes.size(), - "FC input rank should be same as the rank of group_size ", +DynamicQuantize::DynamicQuantize(const Output& data, const Attributes& attrs) : Op({data}), m_attrs(attrs) { + if (m_attrs.scales_zp_output_order.empty()) { + m_attrs.scales_zp_output_order.resize(data.get_partial_shape().size()); + std::iota(m_attrs.scales_zp_output_order.begin(), m_attrs.scales_zp_output_order.end(), 0); + } + + OPENVINO_ASSERT(data.get_partial_shape().rank() == m_attrs.group_sizes.size(), + "DQ input rank should be same as the rank of group_size ", data.get_tensor_ptr()->get_partial_shape().rank(), " / ", - m_group_sizes.size()); - set_output_size(2); + m_attrs.group_sizes.size()); + + OPENVINO_ASSERT(data.get_partial_shape().size() == m_attrs.scales_zp_output_order.size(), + "DQ input rank should be same as the rank of scales and zero points output order)"); + + size_t outputs_number = 2; + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + outputs_number = 3; + + OPENVINO_ASSERT( + (m_attrs.output_storage_type == OutputStorageType::Planar) || + (m_attrs.quantization_type == QuantizationType::Asymmetric && m_attrs.scale_dt == m_attrs.zp_dt), + "Scales and Zero Points should have the same data type to be stored in the single buffer"); + + set_output_size(outputs_number); validate_and_infer_types(); } void DynamicQuantize::validate_and_infer_types() { std::vector input_shapes = {get_input_partial_shape(0)}; - auto out_shapes = shape_infer(this, input_shapes, m_group_sizes); - set_output_type(0, element::i8, out_shapes[0]); - set_output_type(1, m_dt_scale, out_shapes[1]); + auto out_shapes = shape_infer(this, input_shapes); + set_output_type(0, m_attrs.quantization_dt, out_shapes[0]); + set_output_type(1, m_attrs.scale_dt, out_shapes[1]); + + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + set_output_type(2, m_attrs.zp_dt, out_shapes[2]); } std::shared_ptr DynamicQuantize::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), m_group_sizes, m_dt_scale); + return std::make_shared(new_args.at(0), m_attrs); } std::vector DynamicQuantize::shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes) { + const std::vector& input_shapes) { std::vector out_shapes; out_shapes.push_back(input_shapes[0]); auto scale_shape = input_shapes[0]; + const auto& group_sizes = op->m_attrs.group_sizes; OPENVINO_ASSERT(scale_shape.size() == group_sizes.size(), "Scale_shape and group_size are supposed to have same rank: ", scale_shape.size(), " / ", group_sizes.size()); for (size_t i = 0; i < scale_shape.size(); i++) { - if (scale_shape[i].is_dynamic()) + if (scale_shape[i].is_dynamic() || scale_shape[i] == 0) continue; - if (group_sizes[i] == UINT64_MAX) + if (group_sizes[i] == UINT64_MAX) { scale_shape[i] = 1; - else { - scale_shape[i] /= group_sizes[i]; // if group_size is larger than shape, scale_shape will be 1 - scale_shape[i] = std::max(static_cast(scale_shape[i].get_length()), 1); + } else { + scale_shape[i] = ov::util::ceil_div(scale_shape[i].get_length(), static_cast(group_sizes[i])); } } out_shapes.push_back(scale_shape); + + // Add zero points shape, same as the scales + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type == OutputStorageType::Planar) + out_shapes.push_back(scale_shape); + + auto transpose_shape = [](const ov::PartialShape& shape, const std::vector& scales_zp_output_order) { + auto transposed_shape = shape; + for (size_t i = 0; i < scales_zp_output_order.size(); i++) { + OPENVINO_ASSERT(scales_zp_output_order[i] < transposed_shape.size()); + transposed_shape[i] = shape[scales_zp_output_order[i]]; + } + + return transposed_shape; + }; + + // Transpose scales and zero points shapes + const auto& scales_zp_output_order = op->m_attrs.scales_zp_output_order; + for (size_t i = 1; i < out_shapes.size(); i++) { + out_shapes[i] = transpose_shape(out_shapes[i], scales_zp_output_order); + } + + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type != OutputStorageType::Planar) { + // Currently scales and zero points are supposed to be combined over the last dimension only + const auto combine_axis = scales_zp_output_order.empty() ? out_shapes[1].size() - 1 + : scales_zp_output_order[out_shapes[1].size() - 1]; + OPENVINO_ASSERT(group_sizes[combine_axis] != 1); + + out_shapes[1][combine_axis] *= 2; // [scale, zero_point] pairs + } + return out_shapes; } diff --git a/src/common/transformations/src/ov_ops/lora_subgraph.cpp b/src/common/transformations/src/ov_ops/lora_subgraph.cpp new file mode 100644 index 00000000000000..8a7a5a75c69c7e --- /dev/null +++ b/src/common/transformations/src/ov_ops/lora_subgraph.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/lora_subgraph.hpp" + +#include "itt.hpp" + +namespace ov { +namespace op { +namespace internal { + +LoraSubgraph::LoraSubgraph(const OutputVector& args, const std::shared_ptr& body) : SubGraphOp(args) { + SubGraphOp::set_function(body); + for (size_t i = 0; i < body->get_parameters().size(); ++i) + m_input_descriptions[0].push_back(std::make_shared(i, i)); + for (size_t i = 0; i < body->get_output_size(); ++i) + m_output_descriptions[0].push_back(std::make_shared(i, i)); + constructor_validate_and_infer_types(); +} + +std::shared_ptr LoraSubgraph::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_clone_with_new_inputs); + check_new_args_count(this, new_args); + return std::make_shared(new_args, get_function()->clone()); +} + +void LoraSubgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_validate_and_infer_types); + OPENVINO_ASSERT(get_input_size() == 5, "LoraSubgraph must have 5 inputs whereas it has ", get_input_size()); + OPENVINO_ASSERT(get_output_size() == 1, "LoraSubgraph must have 1 output whereas it has ", get_output_size()); + const auto& body = get_function(); + OPENVINO_ASSERT(body, "LoraSubgraph must have initialized body"); + validate_and_infer_type_body(body, m_input_descriptions[0]); + for (size_t i = 0; i < get_output_size(); ++i) + set_output_type(i, body->get_output_element_type(i), body->get_output_partial_shape(i)); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 500d003bd4642e..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -94,9 +94,11 @@ #include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" #include "transformations/op_conversions/convert_scatter_nd_update15_downgrade.hpp" #include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" +#include "transformations/op_conversions/convert_slicescatter.hpp" #include "transformations/op_conversions/convert_softmax_downgrade.hpp" #include "transformations/op_conversions/convert_softmax_upgrade.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" #include "transformations/op_conversions/convert_subtract.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" #include "transformations/op_conversions/convert_xor_to_logical_xor.hpp" @@ -233,6 +235,8 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 221484c75cccde..8d075f4a727758 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -22,6 +22,7 @@ #include "openvino/op/parameter.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" +#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" @@ -280,9 +281,16 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto add_1 = ov::pass::pattern::wrap_type({tanh, add_1_constant}); auto mul_2_constant = ov::pass::pattern::wrap_type(); - auto mul_2 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); - auto mul_3 = ov::pass::pattern::wrap_type({input, mul_2}); + // x * (0.5 * (1 + tanh)) + auto mul_2_1 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); + auto mul_3_1 = ov::pass::pattern::wrap_type({input, mul_2_1}); + + // (x * 0.5) * (1 + tanh) + auto mul_2_2 = ov::pass::pattern::wrap_type({input, mul_2_constant}); + auto mul_3_2 = ov::pass::pattern::wrap_type({add_1, mul_2_2}); + + auto mul_3 = std::make_shared(OutputVector{mul_3_1, mul_3_2}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -298,7 +306,6 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { ov::as_type_ptr(pattern_to_output.at(mul_2_constant).get_node_shared_ptr()); auto add_1_constant_value = ov::as_type_ptr(pattern_to_output.at(add_1_constant).get_node_shared_ptr()); - if (!pow_constant_value || !add_1_constant_value || !mul_0_constant_value || !mul_1_constant_value || !mul_2_constant_value) { return false; @@ -318,18 +325,17 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto gelu = std::make_shared(x_output, op::GeluApproximationMode::TANH); gelu->set_friendly_name(m.get_match_root()->get_friendly_name()); - ov::copy_runtime_info( - { - pattern_to_output.at(pow).get_node_shared_ptr(), - pattern_to_output.at(mul_0).get_node_shared_ptr(), - pattern_to_output.at(mul_1).get_node_shared_ptr(), - pattern_to_output.at(mul_2).get_node_shared_ptr(), - pattern_to_output.at(mul_3).get_node_shared_ptr(), - pattern_to_output.at(tanh).get_node_shared_ptr(), - pattern_to_output.at(add_0).get_node_shared_ptr(), - pattern_to_output.at(add_1).get_node_shared_ptr(), - }, - gelu); + + std::vector> pattern_nodes = + {pow, mul_0, mul_1, tanh, add_0, add_1, mul_2_1, mul_2_2, mul_3_1, mul_3_2}; + std::vector> cp_rt_info_nodes; + for (const auto& pattern_node : pattern_nodes) { + if (pattern_to_output.count(pattern_node)) { + cp_rt_info_nodes.push_back(pattern_to_output.at(pattern_node).get_node_shared_ptr()); + } + } + ov::copy_runtime_info(cp_rt_info_nodes, gelu); + ov::replace_node(m.get_match_root(), gelu); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp new file mode 100644 index 00000000000000..366ce00894242e --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp @@ -0,0 +1,108 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/lora_subgraph_fusion.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convolution.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/util/read_value_base.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/lora_subgraph.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { + MATCHER_SCOPE(LoraSubgraphFusion); + using namespace pass::pattern; + auto lora_input_m = any_input(); + auto transpose_const1_m = wrap_type(consumers_count(1)); + auto transpose1_m = optional({lora_input_m, transpose_const1_m}, consumers_count(1)); + auto read_value1_m = wrap_type(); + auto matmul1_m = wrap_type({transpose1_m, read_value1_m}, consumers_count(1)); + auto read_value2_m = wrap_type(); + auto multiply_m = wrap_type({matmul1_m, read_value2_m}, consumers_count(1)); + auto read_value3_m = wrap_type(); + auto matmul2_m = wrap_type({multiply_m, read_value3_m}, consumers_count(1)); + auto transpose_const2_m = wrap_type(consumers_count(1)); + auto transpose2_m = optional({matmul2_m, transpose_const2_m}, consumers_count(1)); + auto main_flow_m = wrap_type({lora_input_m, any_input()}); + auto add_m = wrap_type({transpose2_m, main_flow_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto& lora_input = pattern_map.at(lora_input_m); + const auto& matmul1 = pattern_map.at(matmul1_m); + const auto& read_value1 = pattern_map.at(read_value1_m); + const auto& multiply = pattern_map.at(multiply_m); + const auto& read_value2 = pattern_map.at(read_value2_m); + const auto& matmul2 = pattern_map.at(matmul2_m); + const auto& read_value3 = pattern_map.at(read_value3_m); + const auto& main_flow = pattern_map.at(main_flow_m); + const auto& add = pattern_map.at(add_m); + + const auto add_node = add.get_node_shared_ptr(); + if (transformation_callback(add_node)) { + return false; + } + + auto find_connected_input = [](ov::Node* child, ov::Node* parent) { + for (size_t i = 0; i < child->get_input_size(); ++i) { + auto input = child->input(i); + if (input.get_source_output().get_node() == parent) + return input; + } + OPENVINO_THROW("Ops are not connected"); + }; + + // Note: internal_inputs/external_connections order corresponds to LoraSubgraph semantic + const std::vector> internal_inputs{ + // For commutative eltwise ops, input idx may be any, so it must be computed + find_connected_input(add.get_node(), main_flow.get_node()), + pattern_map.count(transpose1_m) ? pattern_map.at(transpose1_m).get_node()->input(0) + : matmul1.get_node()->input(0), + matmul1.get_node()->input(1), + find_connected_input(multiply.get_node(), read_value2.get_node()), + matmul2.get_node()->input(1), + }; + const ov::OutputVector external_connections{ + main_flow, + lora_input, + read_value1, + read_value2, + read_value3, + }; + + ov::ParameterVector subgraph_parameters; + subgraph_parameters.reserve(internal_inputs.size()); + for (auto& in : internal_inputs) { + auto new_parameter = std::make_shared(in.get_element_type(), in.get_partial_shape()); + subgraph_parameters.push_back(new_parameter); + in.replace_source_output(new_parameter); + } + // Note: lora consumers should be taken before lora_subgraph creation, + // because only original consumers should be replaced with lora's output + const auto& lora_consumers = add.get_target_inputs(); + const auto lora_subgraph = std::make_shared(ov::OutputVector{add}, subgraph_parameters); + const auto lora_node = std::make_shared(external_connections, lora_subgraph); + ov::copy_runtime_info(m.get_matched_nodes(), lora_node); + lora_node->set_friendly_name(add_node->get_friendly_name()); + + for (const auto& consumer : lora_consumers) + consumer.replace_source_output(lora_node->output(0)); + if (!add.get_names().empty()) + lora_node->output(0).set_names(add.get_names()); + return true; + }; + + auto m = std::make_shared(add_m, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index f0de55f4028c94..d86b4b71f102c7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -178,19 +178,21 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]), std::end(to_remove_descriptors_indexes[body_idx]), desc_idx) > 0) { - auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; - body_func->remove_parameter(body_param); - // Move all body indexes which are after these indicated by to_remove_descriptors_indexes - update_body_param_desc(body_in_descriptors, - body_in_descriptors[desc_idx]->m_body_parameter_index); + if (body_in_descriptors[desc_idx]->m_body_parameter_index < body_params.size()) { + auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; + body_func->remove_parameter(body_param); + // Move all body indexes which are after these indicated by to_remove_descriptors_indexes + update_body_param_desc(body_in_descriptors, + body_in_descriptors[desc_idx]->m_body_parameter_index); + } // remove dangling input of MultiSubGraphOp which was not removed earlier auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index; - auto& current_input = op_inputs[current_input_idx]; // the same input tensor can go to different input ports - if (std::count(std::begin(required_inputs_indices), + if (current_input_idx < op_inputs.size() && + std::count(std::begin(required_inputs_indices), std::end(required_inputs_indices), current_input_idx) == 0 && - std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { + std::count(std::begin(op_inputs), std::end(op_inputs), op_inputs[current_input_idx]) > 0) { op_inputs.erase(std::next(op_inputs.begin(), current_input_idx)); // Move all input indexes (in all bodies) which are after these indicated by // to_remove_descriptors_indexes and are not used in any body diff --git a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp index 211f351da34024..9a06201f688675 100644 --- a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp @@ -282,6 +282,15 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptrget_input_tensor(0).m_element_type = element::boolean; is_changed = true; } + + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + // If operations created by fusing Switch-Merge sub-graph contain tf_switch_merge_if rt-info + if (if_op->get_rt_info().count("tf_switch_merge_if") && + if_op->get_rt_info()["tf_switch_merge_if"].as() && + if_op->input_value(0).get_partial_shape().rank().is_dynamic()) { + if_op->get_input_tensor(0).m_partial_shape = ov::PartialShape({}); + is_changed = true; + } } else if (ov::as_type_ptr(op)) { is_changed |= inherit_output_shape(op, {0}); is_changed |= inherit_output_type(op, {1}); diff --git a/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp index 226093143e68d8..188d0b07684098 100644 --- a/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp @@ -103,8 +103,11 @@ bool nodes_are_equal(const std::shared_ptr& lhs, const std::shared_ptr& model) { bool rewritten = false; - - for (const auto& op : model->get_ordered_ops()) { + std::unordered_map, size_t> index_map; + const auto& order = model->get_ordered_ops(); + for (size_t i = 0; i < order.size(); ++i) + index_map[order[i]] = i; + for (const auto& op : order) { // Recursively apply transformation for sub-graph based operations if (auto multi_subgraph_op = dynamic_pointer_cast(op)) { for (const auto& sub_graph : multi_subgraph_op->get_functions()) { @@ -124,6 +127,13 @@ bool shared_node_optimization(const shared_ptr& model) { auto& shared_nodes = item.second; if (shared_nodes.size() < 2) continue; + // sort shared_nodes so that root would be the earliest in the topological order + // it is critical for continuous application of this optimization + std::sort(shared_nodes.begin(), + shared_nodes.end(), + [&index_map](const std::shared_ptr& a, const std::shared_ptr& b) { + return index_map[a] < index_map[b]; + }); std::vector visited_nodes(shared_nodes.size(), false); for (size_t i = 0; i < visited_nodes.size(); ++i) { diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 6f5166dfd26760..3ab2c694be40ef 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -62,6 +62,8 @@ bool fuse_type_to_ctc_greedy_decoder_seq_len(const std::shared_ptr& no bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions); + bool extend_select_type(const std::shared_ptr& node, const precisions_map& precisions); bool extend_reverse_type(const std::shared_ptr& node, const precisions_map& precisions); @@ -254,7 +256,6 @@ bool convert_function_precision(const std::shared_ptr& f, // Register internal constants only after fixing input type that could lead to nodes // replacement register_constants(ops); - for (auto& node : ops) { // skip precision sensitive nodes if (skip_precision_sensitive && fp16_compression_is_disabled(node) && has_fp16_compression) @@ -469,7 +470,8 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& {ov::op::v13::Multinomial::get_type_info_static(), fuse_type_to_multinomial_v13}, {ov::op::v0::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, {ov::op::v8::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, - {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}}; + {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}, + {ov::op::v15::SearchSorted::get_type_info_static(), fuse_type_to_search_sorted_v15}}; for (const auto& it : m_additional_type_to_fuse_map) { type_to_fuse[it.first] = it.second; @@ -554,6 +556,18 @@ bool fuse_type_to_unique_v10(const std::shared_ptr& node, const precisions return res; } +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions) { + bool res = false; + if (auto op = ov::as_type_ptr(node)) { + auto it = precisions.find(node->get_output_element_type(0)); + if (it != precisions.end()) { + op->set_output_type_attr(it->second); + res = true; + } + } + return res; +} + bool fuse_type_to_range_v4(const std::shared_ptr& node, const precisions_map& precisions) { auto it = precisions.find(node->get_output_element_type(0)); if (it == precisions.end()) @@ -1000,12 +1014,14 @@ bool extend_select_type(const std::shared_ptr& node, const precisions_ type_relaxed->set_origin_input_type(ov::element::boolean, 0); return true; } else if (auto casted = ov::as_type_ptr(node)) { - auto relaxed_op = - std::make_shared>(*casted, - ov::element::TypeVector{ov::element::boolean}, - ov::element::TypeVector{}); - replace_node(node, relaxed_op); - return true; + if (precisions.count(ov::element::boolean) != 0) { + auto relaxed_op = + std::make_shared>(*casted, + ov::element::TypeVector{ov::element::boolean}, + ov::element::TypeVector{}); + replace_node(node, relaxed_op); + return true; + } } return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp b/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp new file mode 100644 index 00000000000000..eedde963461a6b --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_slicescatter.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/slice_scatter.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +ov::pass::ConvertSliceScatter::ConvertSliceScatter() { + MATCHER_SCOPE(ConvertSliceScatter); + + const auto& slicescatter = pattern::wrap_type(); + + const matcher_pass_callback callback = [this](pattern::Matcher& m) { + const auto& slice_node = ov::as_type_ptr(m.get_match_root()); + if (!slice_node || transformation_callback(slice_node)) { + return false; + } + NodeRegistry node_registry; + const auto& const_0 = node_registry.make(ov::element::i64, Shape{}, 0); + const auto& const_1 = node_registry.make(ov::element::i64, Shape{}, 1); + const auto& const_1d_neg_1 = + node_registry.make(ov::element::i64, Shape{1}, std::vector{-1}); + const auto& const_scatter_indices_shape = + node_registry.make(ov::element::i64, Shape{2}, std::vector{-1, 1}); + const auto& data_shape = node_registry.make(slice_node->input_value(0), ov::element::i64); + const auto& num_elements_data = node_registry.make(data_shape, const_0, false); + const auto& data_indices_flatten = + node_registry.make(const_0, num_elements_data, const_1, ov::element::i64); + const auto& full_data_indices = + node_registry.make(data_indices_flatten, data_shape, false); + std::shared_ptr slice_indices; + if (slice_node->get_input_size() == 5) { + slice_indices = node_registry.make(full_data_indices, + slice_node->input_value(2), + slice_node->input_value(3), + slice_node->input_value(4)); + } else { + slice_indices = node_registry.make(full_data_indices, + slice_node->input_value(2), + slice_node->input_value(3), + slice_node->input_value(4), + slice_node->input_value(5)); + } + const auto& slice_indices_flatten = + node_registry.make(slice_indices, const_scatter_indices_shape, false); + const auto& updates_flatten = + node_registry.make(slice_node->input_value(1), const_1d_neg_1, false); + const auto& data_flatten = + node_registry.make(slice_node->input_value(0), const_1d_neg_1, false); + const auto& output_flatten = + node_registry.make(data_flatten, slice_indices_flatten, updates_flatten); + const auto& output = node_registry.make(output_flatten, data_shape, false); + + output->set_friendly_name(slice_node->get_friendly_name()); + copy_runtime_info(slice_node, node_registry.get()); + replace_node(slice_node, output); + + return true; + }; + + const auto& m = std::make_shared(slicescatter, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp new file mode 100644 index 00000000000000..50701d3d6acd56 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertSqueeze15ToSqueeze0::ConvertSqueeze15ToSqueeze0() { + MATCHER_SCOPE(ConvertSqueeze15ToSqueeze0); + + const auto& squeeze_v15_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + const auto& squeeze_v15 = ov::as_type_ptr(m.get_match_root()); + if (!squeeze_v15 || transformation_callback(squeeze_v15)) { + return false; + } + std::shared_ptr squeeze_v0; + if (squeeze_v15->get_input_size() == 1) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0)); + } else if (squeeze_v15->get_input_size() == 2 && !squeeze_v15->get_allow_axis_skip()) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0), squeeze_v15->input_value(1)); + } else { + return false; + } + squeeze_v0->set_friendly_name(squeeze_v15->get_friendly_name()); + copy_runtime_info(squeeze_v15, squeeze_v0); + replace_node(squeeze_v15, squeeze_v0); + + return true; + }; + + auto m = std::make_shared(squeeze_v15_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 27790904f4360b..55f0794e0ee008 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -7,6 +7,7 @@ #include "itt.hpp" #include "openvino/core/bound_evaluation_util.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/core/tensor_util.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" @@ -354,12 +355,85 @@ void save_shape_sources(const std::shared_ptr& op, STS_map& symbol_sha } } } + +struct OutputValue { + std::vector value; + + bool operator==(const OutputValue& other) const { + return value == other.value; + } + + bool operator<(const OutputValue& other) const { + return std::lexicographical_compare( + std::begin(value), + std::end(value), + std::begin(other.value), + std::end(other.value), + [](const ov::Any& a, const ov::Any& b) { + // each element is either a symbol or an integer. in case they differ any integer is less than a symbol. + if (a.is>() && b.is>()) + return a.as>() < b.as>(); + if (a.is() && b.is()) + return a.as() < b.as(); + return a.is(); + }); + } + + static ov::optional make(const ov::Output& output) { + auto symbols = output.get_tensor().get_value_symbol(); + if (symbols.empty() || symbols.size() == 1) + return {}; + + const auto& lower_value = ov::util::to_vector(output.get_tensor().get_lower_value()); + const auto& upper_value = ov::util::to_vector(output.get_tensor().get_upper_value()); + const auto& et = output.get_element_type(); + bool use_values = lower_value && upper_value && (et == ov::element::i64 || et == ov::element::i32); + + std::vector symbols_as_any(symbols.size(), nullptr); + for (size_t i = 0; i < symbols_as_any.size(); ++i) { + if (use_values && lower_value->at(i) == upper_value->at(i)) + symbols_as_any[i] = lower_value->at(i); + else if (symbols.at(i) != nullptr) + symbols_as_any[i] = ov::symbol::ancestor_of(symbols.at(i)); + else + return {}; + } + return {OutputValue{std::move(symbols_as_any)}}; + } +}; + +void save_and_update_value_sources(const std::shared_ptr& op, + std::map>& multi_symbol_source) { + for (auto& output : op->outputs()) { + if (output.get_tensor().get_value_symbol().size() < 2) + continue; // singular values are handled by optimize_value_usage helper + + if (auto result = OutputValue::make(output)) { + if (multi_symbol_source.count(*result)) { + auto alternative_source = multi_symbol_source[*result]; + if (output.get_element_type() != alternative_source.get_element_type()) { + auto convert = std::make_shared(alternative_source, output.get_element_type()); + ov::copy_runtime_info(output.get_node_shared_ptr(), convert); + alternative_source = convert->output(0); + } + if (output.get_partial_shape().is_dynamic() || + output.get_partial_shape() != alternative_source.get_partial_shape()) + continue; + output.replace(alternative_source); + } else { + multi_symbol_source[*result] = output; + } + } + } +} + } // namespace bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptr& m) { RUN_ON_FUNCTION_SCOPE(OptimizeSymbolsUsedAsValues); STS_map symbol_shape_source; STS_map symbol_value_source; + std::map> multi_symbol_source; for (const auto& op : topological_order(m)) { // Result has output port which has shared (during validate_and_infer_type) tensor with input port. // Transformations may replace input of Result. After replacement and before Result::validate_and_infer_type -- @@ -375,6 +449,7 @@ bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptroutputs()) optimize_value_usage(output, symbol_shape_source, symbol_value_source); save_shape_sources(op, symbol_shape_source); + save_and_update_value_sources(op, multi_symbol_source); } return true; } diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp index 837d2ba6d4597e..dbc54f5492bffa 100644 --- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp @@ -388,6 +388,44 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) { } } +TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value_2) { + { + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = + std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); + + auto mul_1_constant = + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); + + auto tanh = std::make_shared(mul_1); + + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); + + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(input, mul_2_constant); + + auto mul_3 = std::make_shared(add_1, mul_2); + + model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); + manager.register_pass(); + } + + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); + } +} + TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { { auto input = std::make_shared(element::f32, Shape{2, 2}); diff --git a/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp new file mode 100644 index 00000000000000..6557f763c6b368 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp @@ -0,0 +1,245 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/lora_subgraph_fusion.hpp" + +#include + +#include + +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/opsets/opset15.hpp" +#include "ov_ops/lora_subgraph.hpp" +#include "transformations/utils/utils.hpp" + +using namespace testing; +using namespace ov; + +static constexpr auto netType = ov::element::f32; + +std::pair create_states(const std::vector& shapes) { + ov::OutputVector read_values; + ov::SinkVector assigns; + size_t idx = 0; + auto create_state = [&](const ov::PartialShape& shape) { + auto variable = + std::make_shared(ov::op::util::VariableInfo{shape, netType, std::to_string(idx++)}); + auto read_value = std::make_shared(variable); + auto assign = std::make_shared(read_value, variable); + read_values.push_back(read_value); + assigns.push_back(assign); + }; + for (const auto& shape : shapes) + create_state(shape); + return std::make_pair(read_values, assigns); +} + +std::shared_ptr create_lora_subgraph(const ov::Output& main_flow, + const ov::Output& lora_input, + const ov::OutputVector& states, + bool add_transposes, + size_t mul_read_value_idx = 1, + size_t add_data_flow_idx = 0) { + OPENVINO_ASSERT(states.size() == 3, "get_lora_subgraph expects states size == 3"); + OPENVINO_ASSERT(mul_read_value_idx == 0 || mul_read_value_idx == 1, "mul_read_value_idx must be 0 or 1"); + OPENVINO_ASSERT(add_data_flow_idx == 0 || add_data_flow_idx == 1, "add_data_flow_idx must be 0 or 1"); + + auto create_transpose = [](const ov::Output& input) -> ov::Output { + auto constant = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{4}, {2, 3, 0, 1}); + return std::make_shared(input, constant); + }; + + const auto& mm1_input = add_transposes ? create_transpose(lora_input) : lora_input; + auto mm1 = std::make_shared(mm1_input, states[0], false, true); + + const auto& mul_in_0 = mul_read_value_idx == 0 ? states[1] : mm1->output(0); + const auto& mul_in_1 = mul_read_value_idx == 0 ? mm1->output(0) : states[1]; + auto mul = std::make_shared(mul_in_0, mul_in_1); + + auto mm2 = std::make_shared(mul, states[2], false, true); + + const auto& add_sec_input = add_transposes ? create_transpose(mm2) : mm2; + const auto& add_in_0 = add_data_flow_idx == 0 ? main_flow : add_sec_input; + const auto& add_in_1 = add_data_flow_idx == 0 ? add_sec_input : main_flow; + return std::make_shared(add_in_0, add_in_1); +} + +class LoraSubgraphFusionTests : public TransformationTestsF { +public: + LoraSubgraphFusionTests() : TransformationTestsF() { + // TODO: remove when these flags will be enabled in TransformationTestsF (ticket XXX-98039) + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); + } + + void SetUp() override { + TransformationTestsF::SetUp(); + manager.register_pass(); + } +}; + +class LoraSubgraphFusionMatMulTests : public LoraSubgraphFusionTests { +public: + const ov::Dimension K = 563; + const ov::Dimension N = 2048; + ov::PartialShape shape_x = {-1, -1, K}; + ov::PartialShape shape_w = {N, K}; + ov::PartialShape shape_state_1 = {-1, K}; + ov::PartialShape shape_state_2 = {1, -1}; + ov::PartialShape shape_state_3 = {N, -1}; +}; + +TEST_F(LoraSubgraphFusionMatMulTests, StandardPattern) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + +TEST_F(LoraSubgraphFusionMatMulTests, ReshaffledEltwiseInputs) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false, 0, 1); + lora_subgraph->set_friendly_name("lora_subgraph"); + + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false, 0, 1); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + +class LoraSubgraphFusionConvolutionTests : public LoraSubgraphFusionTests { +public: + const ov::Dimension num_channels = 320; + ov::PartialShape shape_x = {-1, num_channels, -1, -1}; + ov::PartialShape shape_state_1 = {-1, num_channels}; + ov::PartialShape shape_state_2 = {1, -1}; + ov::PartialShape shape_state_3 = {num_channels, -1}; +}; + +TEST_F(LoraSubgraphFusionConvolutionTests, StandardPattern) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto main_conv = ov::test::utils::make_convolution(param_lora, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels.get_length()); + main_conv->set_friendly_name("main_conv"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_conv, param_lora, states.first, true); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = + std::make_shared(OutputVector{lora_subgraph, main_conv}, states.second, ParameterVector{param_lora}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto main_conv = ov::test::utils::make_convolution(param_lora, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels.get_length()); + main_conv->set_friendly_name("main_conv"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_conv = + std::make_shared(netType, main_conv->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_conv, inner_param_lora, states_outs, true); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_conv, + inner_param_lora, + inner_state_1, + inner_state_2, + inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_conv, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = std::make_shared(OutputVector{lora, main_conv}, states.second, ParameterVector{param_lora}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp index 3f717726b757bc..dcba44313c8c7e 100644 --- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp @@ -6,6 +6,7 @@ #include #include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/ceiling.hpp" #include "openvino/op/concat.hpp" @@ -716,3 +717,57 @@ TEST_F(SharedTransformationTestsF, SharedMaxPool) { model_ref = std::make_shared(OutputVector{concat}, ParameterVector{data}); } } + +TEST_F(SharedTransformationTestsF, TopologicalOrder) { + { + auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + + auto shape_of = std::make_shared(data); + + auto gather_0 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto gather_1 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto gather_2 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto add_0 = std::make_shared(gather_0, gather_0); + auto add_1 = std::make_shared(gather_1, gather_1); + auto add_2 = std::make_shared(gather_2, gather_2); + + auto concat_0 = + std::make_shared(OutputVector{gather_0, add_0, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat_1 = + std::make_shared(OutputVector{gather_1, add_1, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat_2 = + std::make_shared(OutputVector{gather_2, add_2, v0::Constant::create(element::i64, {1}, {0})}, + 0); + + auto concat = std::make_shared(OutputVector{concat_0, concat_1}, 0); + auto output = std::make_shared(OutputVector{concat, concat_2}, 0); + + model = std::make_shared(OutputVector{output}, ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + auto shape_of = std::make_shared(data); + auto gather_0 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + auto add_0 = std::make_shared(gather_0, gather_0); + auto concat_0 = + std::make_shared(OutputVector{gather_0, add_0, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat = std::make_shared(OutputVector{concat_0, concat_0}, 0); + auto output = std::make_shared(OutputVector{concat, concat_0}, 0); + model_ref = std::make_shared(OutputVector{output}, ParameterVector{data}); + } +} diff --git a/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp new file mode 100644 index 00000000000000..c3548128403624 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp @@ -0,0 +1,145 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/op_conversions/convert_slicescatter.hpp" +#include "transformations/utils/utils.hpp" +namespace { +class ConvertSliceScatterTest : public TransformationTestsF, public testing::WithParamInterface { +private: + void SetUp() override { + TransformationTestsF::SetUp(); + const auto& inputs = GetParam(); + manager.register_pass(); + model = create_v15_model(inputs); + model_ref = create_decomposed_model(inputs); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); + } + +protected: + std::shared_ptr create_v15_model(ov::NodeVector inputs) { + const auto& data = inputs.at(0); + const auto& updates = inputs.at(1); + const auto& start = inputs.at(2); + const auto& stop = inputs.at(3); + const auto& step = inputs.at(4); + ov::ParameterVector params{}; + for (const auto& inp : inputs) { + const auto& param = ov::as_type_ptr(inp); + if (param) { + params.push_back(param); + } + } + std::shared_ptr slicescatter; + if (inputs.size() == 5) { + slicescatter = std::make_shared(data, updates, start, stop, step); + } else { + slicescatter = std::make_shared(data, updates, start, stop, step, inputs.at(5)); + } + slicescatter->set_friendly_name("slicescatter15"); + return std::make_shared(slicescatter->outputs(), params); + } + + std::shared_ptr create_decomposed_model(ov::NodeVector inputs) { + const auto& data = inputs.at(0); + const auto& updates = inputs.at(1); + const auto& start = inputs.at(2); + const auto& stop = inputs.at(3); + const auto& step = inputs.at(4); + ov::ParameterVector params{}; + for (const auto& inp : inputs) { + const auto& param = ov::as_type_ptr(inp); + if (param) { + params.push_back(param); + } + } + const auto& const_0 = ov::op::v0::Constant::create(ov::element::i64, {}, {0}); + const auto& const_1 = ov::op::v0::Constant::create(ov::element::i64, {}, {1}); + const auto& const_1d_neg_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + const auto& const_scatter_indices_shape = ov::op::v0::Constant::create(ov::element::i64, {2}, {-1, 1}); + const auto& data_shape = std::make_shared(data, ov::element::i64); + const auto& num_elements_data = std::make_shared(data_shape, const_0, false); + const auto& data_indices_flatten = + std::make_shared(const_0, num_elements_data, const_1, ov::element::i64); + const auto& full_data_indices = std::make_shared(data_indices_flatten, data_shape, false); + std::shared_ptr slice_indices; + if (inputs.size() == 5) { + slice_indices = std::make_shared(full_data_indices, start, stop, step); + } else { + slice_indices = std::make_shared(full_data_indices, start, stop, step, inputs.at(5)); + } + const auto& slice_indices_flatten = + std::make_shared(slice_indices, const_scatter_indices_shape, false); + const auto& updates_flatten = std::make_shared(updates, const_1d_neg_1, false); + const auto& data_flatten = std::make_shared(data, const_1d_neg_1, false); + const auto& output_flatten = + std::make_shared(data_flatten, slice_indices_flatten, updates_flatten); + const auto& slicescatter = std::make_shared(output_flatten, data_shape, false); + slicescatter->set_friendly_name("slicescatter15"); + return std::make_shared(slicescatter->outputs(), params); + } +}; + +INSTANTIATE_TEST_SUITE_P( + ConvertSliceScatterDecomposition, + ConvertSliceScatterTest, + testing::Values( + ov::NodeVector{ + std::make_shared(ov::element::f32, ov::Shape{256, 10, 15}), + std::make_shared(ov::element::f32, ov::Shape{4, 7, 2}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {0, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::f32, ov::Shape{256, 10, 15}), + std::make_shared(ov::element::f32, ov::Shape{4, 7, 2}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {0, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + })); +TEST_P(ConvertSliceScatterTest, CompareFunctions) {} + +} // namespace diff --git a/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp new file mode 100644 index 00000000000000..f3d90ab2c748bd --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/utils/utils.hpp" +using namespace ov; +using namespace testing; + +namespace { + +enum class IndicesMode { NONE, CONST, PARAM }; + +std::shared_ptr create_v15_model(const IndicesMode indices_mode, + const std::vector indices_const_val, + const bool allow_axis_skip) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data, allow_axis_skip); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset15::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +std::shared_ptr create_v1_model(const IndicesMode indices_mode, const std::vector indices_const_val) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset1::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, false); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, true); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_const_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::CONST, {0, -4, 3}, false); + model_ref = create_v1_model(IndicesMode::CONST, {0, -4, 3}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_dynamic_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, false); + model_ref = create_v1_model(IndicesMode::PARAM, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_unsupported_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, true); +} diff --git a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp index 590290fb19d84b..e4653ec084bafb 100644 --- a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp @@ -9,11 +9,15 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/gather.hpp" +#include "openvino/op/range.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/visualize_tree.hpp" +#include "transformations/common_optimizations/shared_ops_optimization.hpp" #include "transformations/symbolic_transformations/symbolic_optimizations.hpp" #include "transformations/symbolic_transformations/utils.hpp" @@ -95,3 +99,152 @@ TEST_F(TransformationTestsF, ApplySymbolEquivalence_Concat_Values) { model_ref = make_shared(NodeVector{reshape}, ParameterVector{input_2, input_1}); } } + +Output get_dim_by_idx(const Output& source, const int64_t& idx, element::Type type = element::i64) { + auto shape = make_shared(source, type); + auto gather = make_shared(shape, + v0::Constant::create(element::i64, {}, {idx}), + v0::Constant::create(element::i64, {}, {0})); + return gather->output(0); +} + +Output get_dim_by_idx(const Output& source, + initializer_list idx, + element::Type type = element::i64) { + auto shape = make_shared(source, type); + auto gather = make_shared(shape, + v0::Constant::create(element::i64, {idx.size()}, idx), + v0::Constant::create(element::i64, {}, {0})); + return gather->output(0); +} + +TEST_F(TransformationTestsF, ValueOptimizationSingleValue) { + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + + auto dim_0 = get_dim_by_idx(input, {-1}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3}, element::i32); + auto dim_2 = get_dim_by_idx(input, -1, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + auto range = make_shared(v0::Constant::create(element::i32, {}, {0}), + dim_2, + v0::Constant::create(element::i32, {}, {1}), + element::i32); + + model = make_shared(NodeVector{reshape_0, reshape_1, range}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + auto dim_1 = get_dim_by_idx(input, {3}, element::i32); + auto dim_0 = std::make_shared(dim_1, element::i64); + auto dim_2 = std::make_shared(dim_1); + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + auto range = make_shared(v0::Constant::create(element::i32, {}, {0}), + dim_2, + v0::Constant::create(element::i32, {}, {1}), + element::i32); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1, range}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ValueOptimizationDoubleValue) { + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + + auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + + model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + auto dim_0 = get_dim_by_idx(input, {3, 2}, element::i32); + auto dim_1 = std::make_shared(dim_0, element::i64); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_1}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_0}, 0), + false); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { + { + auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); + + auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {-1}), dim_1}, 0), + false); + + model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); + auto dim_0 = make_shared( + OutputVector{v0::Constant::create(element::i32, {1}, {-1}), get_dim_by_idx(input, {3, 2}, element::i32)}, + 0); + auto dim_1 = std::make_shared(dim_0, element::i64); + + auto reshape_0 = make_shared(input, dim_1, false); + auto reshape_1 = make_shared(input, dim_0, false); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 03095b28d96a74..2aa4d4d2fac9e9 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" +#include "openvino/opsets/opset15.hpp" #include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset4.hpp" #include "openvino/opsets/opset5.hpp" @@ -963,6 +964,34 @@ TEST(TransformationTests, ConvertPrecision_Select) { ASSERT_TRUE(has_type(f)); } +TEST(TransformationTests, ConvertPrecision_Select_Relaxed) { + std::shared_ptr f(nullptr); + { + auto input1 = std::make_shared(element::boolean, Shape{15, 20, 3}); + auto node = std::make_shared(input1); + auto select = std::make_shared(node, input1, input1); + + f = std::make_shared(OutputVector{select}, ParameterVector{input1}); + + // Explicitly setting the element type of a node to a different one to + // test the appearance of TypeRelaxed within Select + node->set_output_type(0, ov::element::u8, node->get_output_partial_shape(0)); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::u8, element::boolean}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); + int counter = 0; + for (const auto& node : f->get_ordered_ops()) + if (std::dynamic_pointer_cast(node)) + ++counter; + ASSERT_EQ(counter, 1); +} + TEST(TransformationTests, ConvertPrecision_TypeRelaxedWithSelect) { std::shared_ptr f(nullptr); { @@ -1008,6 +1037,28 @@ TEST(TransformationTests, ConvertPrecision_TypeRelaxed) { } } +TEST(TransformationTests, ConvertPrecision_SearchSorted) { + std::shared_ptr f(nullptr); + { + auto search_sorted_input = opset15::Constant::create(ov::element::i64, {5}, {1, 2, 3, 4, 5}); + auto indices = std::make_shared(ov::element::i64, Shape{3}); + auto search_sorted = std::make_shared(search_sorted_input, indices); + + auto less_input = opset15::Constant::create(ov::element::i64, {3}, {4, 5, 6}); + auto less = std::make_shared(search_sorted, less_input); + + f = std::make_shared(OutputVector{less}, ParameterVector{indices}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::i64, element::i32}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); +} + TEST(TransformationTests, ConvertPrecision_Variables) { std::shared_ptr f(nullptr); { diff --git a/src/common/util/include/openvino/util/weights_path.hpp b/src/common/util/include/openvino/util/weights_path.hpp new file mode 100644 index 00000000000000..db97484be98d35 --- /dev/null +++ b/src/common/util/include/openvino/util/weights_path.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/util/common_util.hpp" + +namespace ov { +namespace util { + +bool validate_weights_path(std::string& weights_path); + +} // namespace ov +} // namespace util diff --git a/src/common/util/src/weights_path.cpp b/src/common/util/src/weights_path.cpp new file mode 100644 index 00000000000000..9cf2336f064dd0 --- /dev/null +++ b/src/common/util/src/weights_path.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "openvino/util/weights_path.hpp" + +bool ov::util::validate_weights_path(std::string& weights_path) { + if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) { + return false; + } + + return true; +} diff --git a/src/core/include/openvino/core/parallel.hpp b/src/core/include/openvino/core/parallel.hpp index 6d3a243c95e7dc..a231c6833f9d84 100644 --- a/src/core/include/openvino/core/parallel.hpp +++ b/src/core/include/openvino/core/parallel.hpp @@ -461,8 +461,10 @@ void parallel_for(const T0& D0, const F& func) { for_1d(ithr, nthr, D0, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); + { for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); } #elif OV_THREAD == OV_THREAD_SEQ for_1d(0, 1, D0, func); #endif @@ -509,8 +511,10 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) { for_2d(ithr, nthr, D0, D1, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); + { for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); } #elif OV_THREAD == OV_THREAD_SEQ for_2d(0, 1, D0, D1, func); #endif @@ -575,8 +579,10 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) { for_3d(ithr, nthr, D0, D1, D2, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); + { for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); } #elif OV_THREAD == OV_THREAD_SEQ for_3d(0, 1, D0, D1, D2, func); #endif @@ -645,8 +651,10 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_4d(ithr, nthr, D0, D1, D2, D3, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); + { for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); } #elif OV_THREAD == OV_THREAD_SEQ for_4d(0, 1, D0, D1, D2, D3, func); #endif @@ -703,8 +711,10 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); + { for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); } #elif OV_THREAD == OV_THREAD_SEQ for_5d(0, 1, D0, D1, D2, D3, D4, func); #endif @@ -763,8 +773,10 @@ void parallel_for6d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_6d(ithr, nthr, D0, D1, D2, D3, D4, D5, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_6d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, D5, func); + { for_6d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, D5, func); } #elif OV_THREAD == OV_THREAD_SEQ for_6d(0, 1, D0, D1, D2, D3, D4, D5, func); #endif diff --git a/src/core/include/openvino/op/search_sorted.hpp b/src/core/include/openvino/op/search_sorted.hpp index c370ba46b2f182..efb1f8491e0882 100644 --- a/src/core/include/openvino/op/search_sorted.hpp +++ b/src/core/include/openvino/op/search_sorted.hpp @@ -22,7 +22,15 @@ class OPENVINO_API SearchSorted : public Op { /// \param values Values to search indexs for. /// \param right_mode If False, return the first suitable index that is found for given value. If True, return /// the last such index. - SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode = false); + /// \param output_type The element type of the output tensor. This is purely an implementation flag, which + /// is used to convert the output type for CPU plugin in ConvertPrecision transformation (and potentially other + /// plugins as well). Setting this flag to element::i32 will result in the output tensor of i32 element type. + /// Setting this flag to element::i64 will generally not give any effect, since it will be converted to i32 anyway, + /// at least for CPU plugin. + SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode = false, + const element::Type& output_type = element::i64); void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; @@ -36,8 +44,17 @@ class OPENVINO_API SearchSorted : public Op { m_right_mode = right_mode; } + void set_output_type_attr(const element::Type& output_type) { + m_output_type = output_type; + } + + element::Type get_output_type_attr() const { + return m_output_type; + } + private: bool m_right_mode{}; + element::Type m_output_type = element::i64; }; } // namespace v15 } // namespace op diff --git a/src/core/include/openvino/op/squeeze.hpp b/src/core/include/openvino/op/squeeze.hpp index 8c27f29d66df66..dde456aa2eef47 100644 --- a/src/core/include/openvino/op/squeeze.hpp +++ b/src/core/include/openvino/op/squeeze.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/op/op.hpp" +#include "openvino/op/util/squeeze_base.hpp" namespace ov { namespace op { @@ -12,30 +12,65 @@ namespace v0 { /// \brief Squeeze operation. /// /// \ingroup ov_ops_cpp_api -class OPENVINO_API Squeeze : public Op { +class OPENVINO_API Squeeze : public util::SqueezeBase { public: OPENVINO_OP("Squeeze", "opset1"); Squeeze(); - Squeeze(const Output& data, const Output& axes); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data Squeeze(const Output& data); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + Squeeze(const Output& data, const Output& axes); void validate_and_infer_types() override; bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; - bool has_evaluate() const override; - bool evaluate_lower(TensorVector& outputs) const override; - bool evaluate_upper(TensorVector& outputs) const override; - bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; - bool can_constant_fold(const OutputVector& inputs_values) const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool is_dynamic() const override; - private: Output get_default_axes_input() const; }; } // namespace v0 + +namespace v15 { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API Squeeze : public util::SqueezeBase { +public: + OPENVINO_OP("Squeeze", "opset15"); + + Squeeze(); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const bool allow_axis_skip = false); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip = false); + + void validate_and_infer_types() override; + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + bool visit_attributes(AttributeVisitor& visitor) override; + + bool get_allow_axis_skip() const; + +private: + Output get_default_axes_input() const; + bool m_allow_axis_skip{}; +}; +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/include/openvino/op/util/squeeze_base.hpp b/src/core/include/openvino/op/util/squeeze_base.hpp new file mode 100644 index 00000000000000..50d960824e10d2 --- /dev/null +++ b/src/core/include/openvino/op/util/squeeze_base.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace util { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API SqueezeBase : public Op { +public: + OPENVINO_OP("Squeeze", "util"); + SqueezeBase() = default; + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + SqueezeBase(const Output& data); + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + SqueezeBase(const Output& data, const Output& axes); + + bool has_evaluate() const override; + bool evaluate_lower(TensorVector& outputs) const override; + bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; + bool can_constant_fold(const OutputVector& inputs_values) const override; + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool is_dynamic() const override; +}; +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset15_tbl.hpp b/src/core/include/openvino/opsets/opset15_tbl.hpp index a18093c4ef3f5c..8d12420719bb6b 100644 --- a/src/core/include/openvino/opsets/opset15_tbl.hpp +++ b/src/core/include/openvino/opsets/opset15_tbl.hpp @@ -7,11 +7,220 @@ # define _OPENVINO_OP_REG(x, y) #endif -// Previous opsets operators -_OPENVINO_OP_REG(Parameter, ov::op::v0) +_OPENVINO_OP_REG(Abs, ov::op::v0) +_OPENVINO_OP_REG(Acos, ov::op::v0) +_OPENVINO_OP_REG(Add, ov::op::v1) +_OPENVINO_OP_REG(Asin, ov::op::v0) +_OPENVINO_OP_REG(Atan, ov::op::v0) +_OPENVINO_OP_REG(AvgPool, ov::op::v14) +_OPENVINO_OP_REG(BatchNormInference, ov::op::v5) +_OPENVINO_OP_REG(BinaryConvolution, ov::op::v1) +_OPENVINO_OP_REG(Broadcast, ov::op::v3) +_OPENVINO_OP_REG(Bucketize, ov::op::v3) +_OPENVINO_OP_REG(CTCGreedyDecoder, ov::op::v0) +_OPENVINO_OP_REG(Ceiling, ov::op::v0) +_OPENVINO_OP_REG(Clamp, ov::op::v0) +_OPENVINO_OP_REG(Concat, ov::op::v0) +_OPENVINO_OP_REG(Constant, ov::op::v0) _OPENVINO_OP_REG(Convert, ov::op::v0) +_OPENVINO_OP_REG(ConvertLike, ov::op::v1) +_OPENVINO_OP_REG(Convolution, ov::op::v1) +_OPENVINO_OP_REG(ConvolutionBackpropData, ov::op::v1) +_OPENVINO_OP_REG(Cos, ov::op::v0) +_OPENVINO_OP_REG(Cosh, ov::op::v0) +_OPENVINO_OP_REG(CumSum, ov::op::v0) +_OPENVINO_OP_REG(DeformablePSROIPooling, ov::op::v1) +_OPENVINO_OP_REG(DepthToSpace, ov::op::v0) +_OPENVINO_OP_REG(Divide, ov::op::v1) +_OPENVINO_OP_REG(Elu, ov::op::v0) +_OPENVINO_OP_REG(Erf, ov::op::v0) +_OPENVINO_OP_REG(Equal, ov::op::v1) +_OPENVINO_OP_REG(Exp, ov::op::v0) +_OPENVINO_OP_REG(ExtractImagePatches, ov::op::v3) +_OPENVINO_OP_REG(FakeQuantize, ov::op::v0) +_OPENVINO_OP_REG(Floor, ov::op::v0) +_OPENVINO_OP_REG(FloorMod, ov::op::v1) +_OPENVINO_OP_REG(GatherTree, ov::op::v1) +_OPENVINO_OP_REG(Greater, ov::op::v1) +_OPENVINO_OP_REG(GreaterEqual, ov::op::v1) +_OPENVINO_OP_REG(GridSample, ov::op::v9) +_OPENVINO_OP_REG(GroupConvolution, ov::op::v1) +_OPENVINO_OP_REG(GroupConvolutionBackpropData, ov::op::v1) +_OPENVINO_OP_REG(GRN, ov::op::v0) +_OPENVINO_OP_REG(HardSigmoid, ov::op::v0) +_OPENVINO_OP_REG(Less, ov::op::v1) +_OPENVINO_OP_REG(LessEqual, ov::op::v1) +_OPENVINO_OP_REG(Log, ov::op::v0) +_OPENVINO_OP_REG(LogicalAnd, ov::op::v1) +_OPENVINO_OP_REG(LogicalNot, ov::op::v1) +_OPENVINO_OP_REG(LogicalOr, ov::op::v1) +_OPENVINO_OP_REG(LogicalXor, ov::op::v1) +_OPENVINO_OP_REG(LRN, ov::op::v0) +_OPENVINO_OP_REG(LSTMCell, ov::op::v4) +_OPENVINO_OP_REG(MatMul, ov::op::v0) +_OPENVINO_OP_REG(Maximum, ov::op::v1) +_OPENVINO_OP_REG(Minimum, ov::op::v1) +_OPENVINO_OP_REG(Mod, ov::op::v1) +_OPENVINO_OP_REG(Multiply, ov::op::v1) +_OPENVINO_OP_REG(Negative, ov::op::v0) +_OPENVINO_OP_REG(NormalizeL2, ov::op::v0) +_OPENVINO_OP_REG(NotEqual, ov::op::v1) +_OPENVINO_OP_REG(OneHot, ov::op::v1) +_OPENVINO_OP_REG(PRelu, ov::op::v0) +_OPENVINO_OP_REG(PSROIPooling, ov::op::v0) +_OPENVINO_OP_REG(Parameter, ov::op::v0) +_OPENVINO_OP_REG(Power, ov::op::v1) +_OPENVINO_OP_REG(PriorBoxClustered, ov::op::v0) +_OPENVINO_OP_REG(Proposal, ov::op::v4) +_OPENVINO_OP_REG(Range, ov::op::v4) +_OPENVINO_OP_REG(Relu, ov::op::v0) +_OPENVINO_OP_REG(ReduceMax, ov::op::v1) +_OPENVINO_OP_REG(ReduceLogicalAnd, ov::op::v1) +_OPENVINO_OP_REG(ReduceLogicalOr, ov::op::v1) +_OPENVINO_OP_REG(ReduceMean, ov::op::v1) +_OPENVINO_OP_REG(ReduceMin, ov::op::v1) +_OPENVINO_OP_REG(ReduceProd, ov::op::v1) +_OPENVINO_OP_REG(ReduceSum, ov::op::v1) +_OPENVINO_OP_REG(RegionYolo, ov::op::v0) +_OPENVINO_OP_REG(ReorgYolo, ov::op::v0) +_OPENVINO_OP_REG(Reshape, ov::op::v1) +_OPENVINO_OP_REG(Result, ov::op::v0) +_OPENVINO_OP_REG(ReverseSequence, ov::op::v0) +_OPENVINO_OP_REG(ROIPooling, ov::op::v0) +_OPENVINO_OP_REG(Select, ov::op::v1) +_OPENVINO_OP_REG(Selu, ov::op::v0) +_OPENVINO_OP_REG(Sign, ov::op::v0) +_OPENVINO_OP_REG(Sigmoid, ov::op::v0) +_OPENVINO_OP_REG(Sin, ov::op::v0) +_OPENVINO_OP_REG(Sinh, ov::op::v0) +_OPENVINO_OP_REG(Sqrt, ov::op::v0) +_OPENVINO_OP_REG(SpaceToDepth, ov::op::v0) +_OPENVINO_OP_REG(Split, ov::op::v1) +_OPENVINO_OP_REG(SquaredDifference, ov::op::v0) +_OPENVINO_OP_REG(Squeeze, ov::op::v15) +_OPENVINO_OP_REG(StridedSlice, ov::op::v1) +_OPENVINO_OP_REG(Subtract, ov::op::v1) +_OPENVINO_OP_REG(Tan, ov::op::v0) +_OPENVINO_OP_REG(Tanh, ov::op::v0) +_OPENVINO_OP_REG(TensorIterator, ov::op::v0) +_OPENVINO_OP_REG(Tile, ov::op::v0) +_OPENVINO_OP_REG(Transpose, ov::op::v1) +_OPENVINO_OP_REG(Unsqueeze, ov::op::v0) +_OPENVINO_OP_REG(VariadicSplit, ov::op::v1) + +// New operations added in opset2 +_OPENVINO_OP_REG(BatchToSpace, ov::op::v1) +_OPENVINO_OP_REG(SpaceToBatch, ov::op::v1) + +// New operations added in opset3 +_OPENVINO_OP_REG(EmbeddingBagPackedSum, ov::op::v3) +_OPENVINO_OP_REG(EmbeddingSegmentsSum, ov::op::v3) +_OPENVINO_OP_REG(EmbeddingBagOffsetsSum, ov::op::v3) +_OPENVINO_OP_REG(GRUCell, ov::op::v3) +_OPENVINO_OP_REG(NonZero, ov::op::v3) +_OPENVINO_OP_REG(RNNCell, ov::op::v0) +_OPENVINO_OP_REG(ScatterUpdate, ov::op::v3) +_OPENVINO_OP_REG(ShuffleChannels, ov::op::v0) _OPENVINO_OP_REG(ShapeOf, ov::op::v3) +// New operations added in opset4 +_OPENVINO_OP_REG(Acosh, ov::op::v3) +_OPENVINO_OP_REG(Asinh, ov::op::v3) +_OPENVINO_OP_REG(Atanh, ov::op::v3) +_OPENVINO_OP_REG(CTCLoss, ov::op::v4) +_OPENVINO_OP_REG(HSwish, ov::op::v4) +_OPENVINO_OP_REG(Mish, ov::op::v4) +_OPENVINO_OP_REG(ReduceL1, ov::op::v4) +_OPENVINO_OP_REG(ReduceL2, ov::op::v4) +_OPENVINO_OP_REG(SoftPlus, ov::op::v4) +_OPENVINO_OP_REG(Swish, ov::op::v4) + +// New operations added in opset5 +_OPENVINO_OP_REG(GRUSequence, ov::op::v5) +_OPENVINO_OP_REG(HSigmoid, ov::op::v5) +_OPENVINO_OP_REG(LogSoftmax, ov::op::v5) +_OPENVINO_OP_REG(Loop, ov::op::v5) +_OPENVINO_OP_REG(LSTMSequence, ov::op::v5) +_OPENVINO_OP_REG(RNNSequence, ov::op::v5) +_OPENVINO_OP_REG(Round, ov::op::v5) + +// New operations added in opset6 +_OPENVINO_OP_REG(CTCGreedyDecoderSeqLen, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronDetectionOutput, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronGenerateProposalsSingleImage, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronPriorGridGenerator, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronROIFeatureExtractor, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronTopKROIs, ov::op::v6) +_OPENVINO_OP_REG(GatherElements, ov::op::v6) +_OPENVINO_OP_REG(MVN, ov::op::v6) +_OPENVINO_OP_REG(Assign, ov::op::v6) // new version +_OPENVINO_OP_REG(ReadValue, ov::op::v6) // new version + +// New operations added in opset7 +_OPENVINO_OP_REG(DFT, ov::op::v7) +_OPENVINO_OP_REG(Einsum, ov::op::v7) +_OPENVINO_OP_REG(Gelu, ov::op::v7) +_OPENVINO_OP_REG(IDFT, ov::op::v7) +_OPENVINO_OP_REG(Roll, ov::op::v7) + +// New operations added in opset8 +_OPENVINO_OP_REG(Gather, ov::op::v8) +_OPENVINO_OP_REG(GatherND, ov::op::v8) +_OPENVINO_OP_REG(AdaptiveAvgPool, ov::op::v8) +_OPENVINO_OP_REG(AdaptiveMaxPool, ov::op::v8) +_OPENVINO_OP_REG(DeformableConvolution, ov::op::v8) +_OPENVINO_OP_REG(DetectionOutput, ov::op::v8) +_OPENVINO_OP_REG(I420toBGR, ov::op::v8) +_OPENVINO_OP_REG(I420toRGB, ov::op::v8) +_OPENVINO_OP_REG(MatrixNms, ov::op::v8) +_OPENVINO_OP_REG(MaxPool, ov::op::v14) +_OPENVINO_OP_REG(NV12toBGR, ov::op::v8) +_OPENVINO_OP_REG(NV12toRGB, ov::op::v8) +_OPENVINO_OP_REG(RandomUniform, ov::op::v8) +_OPENVINO_OP_REG(Slice, ov::op::v8) +_OPENVINO_OP_REG(Softmax, ov::op::v8) +_OPENVINO_OP_REG(If, ov::op::v8) +_OPENVINO_OP_REG(PriorBox, ov::op::v8) + +// New operations added in opset9 +_OPENVINO_OP_REG(IRDFT, ov::op::v9) +_OPENVINO_OP_REG(RDFT, ov::op::v9) +_OPENVINO_OP_REG(Eye, ov::op::v9) +_OPENVINO_OP_REG(NonMaxSuppression, ov::op::v9) +_OPENVINO_OP_REG(ROIAlign, ov::op::v9) +_OPENVINO_OP_REG(SoftSign, ov::op::v9) +_OPENVINO_OP_REG(GenerateProposals, ov::op::v9) +_OPENVINO_OP_REG(MulticlassNms, ov::op::v9) + +// New operations added in opset10 +_OPENVINO_OP_REG(IsFinite, ov::op::v10) +_OPENVINO_OP_REG(IsInf, ov::op::v10) +_OPENVINO_OP_REG(IsNaN, ov::op::v10) +_OPENVINO_OP_REG(Unique, ov::op::v10) + +// New operations added in opset11 +_OPENVINO_OP_REG(Interpolate, ov::op::v11) +_OPENVINO_OP_REG(TopK, ov::op::v11) + +// New operations added in opset12 +_OPENVINO_OP_REG(GroupNormalization, ov::op::v12) +_OPENVINO_OP_REG(Pad, ov::op::v12) +_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v12) + +// New operations added in opset13 +_OPENVINO_OP_REG(BitwiseAnd, ov::op::v13) +_OPENVINO_OP_REG(BitwiseNot, ov::op::v13) +_OPENVINO_OP_REG(BitwiseOr, ov::op::v13) +_OPENVINO_OP_REG(BitwiseXor, ov::op::v13) +_OPENVINO_OP_REG(NMSRotated, ov::op::v13) +_OPENVINO_OP_REG(Multinomial, ov::op::v13) +_OPENVINO_OP_REG(ScaledDotProductAttention, ov::op::v13) +_OPENVINO_OP_REG(FakeConvert, ov::op::v13) + +// New operations added in opset14 +_OPENVINO_OP_REG(ConvertPromoteTypes, ov::op::v14) +_OPENVINO_OP_REG(Inverse, ov::op::v14) + // New operations added in opset15 _OPENVINO_OP_REG(ROIAlignRotated, ov::op::v15) _OPENVINO_OP_REG(ScatterNDUpdate, ov::op::v15) diff --git a/src/core/reference/include/openvino/reference/search_sorted.hpp b/src/core/reference/include/openvino/reference/search_sorted.hpp index ca5361c388c621..7ea8ec1078a2a1 100644 --- a/src/core/reference/include/openvino/reference/search_sorted.hpp +++ b/src/core/reference/include/openvino/reference/search_sorted.hpp @@ -4,6 +4,7 @@ #pragma once +#include "openvino/core/parallel.hpp" #include "openvino/core/shape.hpp" #include "openvino/reference/utils/coordinate_index.hpp" #include "openvino/reference/utils/coordinate_transform.hpp" @@ -30,7 +31,13 @@ void search_sorted(const T* sorted, }; } - for (const Coordinate& values_coord : values_transform) { + const size_t size = shape_size(values_shape); + + auto func = [&](size_t i) { + auto it = values_transform.begin(); + it += i; + const Coordinate& values_coord = *it; + const auto values_index = coordinate_index(values_coord, values_shape); const T value = values[values_index]; @@ -48,7 +55,9 @@ void search_sorted(const T* sorted, const ptrdiff_t sorted_index = (idx_ptr - sorted) - sorted_index_begin; out[values_index] = static_cast(sorted_index); - } + }; + + ov::parallel_for(size, func); } } // namespace reference diff --git a/src/core/reference/src/op/multiclass_nms.cpp b/src/core/reference/src/op/multiclass_nms.cpp index 3f0b6c8634ea9d..88d8d405c618af 100644 --- a/src/core/reference/src/op/multiclass_nms.cpp +++ b/src/core/reference/src/op/multiclass_nms.cpp @@ -48,7 +48,6 @@ static float intersectionOverUnion(const Rectangle& boxI, const Rectangle& boxJ, // start: start index along axis "M" template std::vector slice_image(const T* data, const Shape& data_shape, const int64_t start, const int64_t item_num) { - std::vector slice_data; const auto class_num = data_shape[0]; const auto item_size = (data_shape.size() == 3) ? data_shape[2] : 1; @@ -57,7 +56,7 @@ std::vector slice_image(const T* data, const Shape& data_shape, const int64_t "Invaid inputs as it is trying to slice data out of range."); const auto row_num = item_num * item_size; - slice_data.reserve(class_num * row_num); + std::vector slice_data(static_cast(class_num * row_num)); T* item_data = slice_data.data(); T* src = const_cast(data + start * item_size); for (size_t i = 0; i < class_num; i++) { diff --git a/src/core/reference/src/op/stft.cpp b/src/core/reference/src/op/stft.cpp index d3a93db0e1e937..6fd5583be21a75 100644 --- a/src/core/reference/src/op/stft.cpp +++ b/src/core/reference/src/op/stft.cpp @@ -21,8 +21,9 @@ void stft(const float* signal, const int64_t frame_size, const int64_t frame_step, const bool transpose_frames) { - constexpr size_t signal_axis = 1; - const auto batch_size = signal_shape[0]; + const auto is_signal_1D = signal_shape.size() == 1; + const size_t batch_size = is_signal_1D ? 1 : signal_shape[0]; + const size_t signal_axis = is_signal_1D ? 0 : 1; const auto signal_length = signal_shape[signal_axis]; const auto num_frames = static_cast((signal_length - frame_size) / frame_step) + 1; const auto frame_size_dim = static_cast(frame_size); diff --git a/src/core/shape_inference/include/squeeze_shape_inference.hpp b/src/core/shape_inference/include/squeeze_shape_inference.hpp index ee71b5452db1c3..31eeea5d36a9ea 100644 --- a/src/core/shape_inference/include/squeeze_shape_inference.hpp +++ b/src/core/shape_inference/include/squeeze_shape_inference.hpp @@ -11,6 +11,117 @@ namespace ov { namespace op { namespace v0 { +template > +std::vector shape_infer(const Squeeze* op, + const std::vector& input_shapes, + const ITensorAccessor& ta = make_tensor_accessor()) { + using DimType = typename T::value_type; + + const auto number_of_inputs = input_shapes.size(); + OPENVINO_ASSERT(!input_shapes.empty()); + + const auto& arg_shape = input_shapes[0]; + const auto& arg_rank = arg_shape.rank(); + auto output_shapes = std::vector(1); + auto& output_shape = output_shapes[0]; + + std::unique_ptr> unique_axes; + + if (number_of_inputs == 1) { + unique_axes.reset(new std::set()); + } else if (number_of_inputs == 2) { + const auto& axes_shape = input_shapes[1]; + NODE_VALIDATION_CHECK(op, + axes_shape.is_dynamic() || ov::util::is_rank_compatible_any_of(axes_shape.rank(), {0, 1}), + "Second input (axes) should not be of rank higher than 1. Got: ", + axes_shape.rank().get_length()); + + std::vector axes; + if (arg_rank.is_static() && axes_shape.is_static()) { + if (auto axes = get_input_const_data_as(op, 1, ta)) { + // The values of `axes` input are known + ov::util::try_normalize_axes(*axes, arg_rank, *op); + unique_axes.reset(new std::set(axes->cbegin(), axes->cend())); + } else if (arg_rank.get_length() > 0 && shape_size(axes_shape.to_shape()) == 1) { + // The `axes` input is a single element tensor which is unique by definition, deducing output rank + const auto has_squeezable_dim = + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& dim) { + return dim.compatible(1); + }); + if (has_squeezable_dim) { + output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + } else { + output_shape = arg_shape; + } + return output_shapes; + } + } + } else { + // Invalid number of inputs, empty error message for backward compatibility. + NODE_VALIDATION_CHECK(op, false); + } + + if (arg_rank.is_static() && (unique_axes != nullptr)) { + output_shape.resize(0); + if (unique_axes->empty()) { + // if only first input provided or axes are empty remove all dimensions equal to 1. + if (std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& d) { + return d.is_dynamic() && d.compatible(1); + })) { + // we are unsure if dynamic dimensions would be equal to 1 or not, so we set dynamic output rank + output_shape = PartialShape::dynamic(); + return output_shapes; + } else { + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + [](const DimType& dim) { + return !dim.compatible(1); + }); + } + } else { + int64_t idx = 0; + auto rm_axis_iter = unique_axes->cbegin(); + auto rm_axis_end = unique_axes->cend(); + + // Returns true if dimension not squeezable on axis from input axes. + const auto not_squeezable_at_axis = [&rm_axis_iter, &rm_axis_end, &idx](const DimType& dim) { + if ((rm_axis_iter != rm_axis_end) && (*rm_axis_iter == idx++)) { + ++rm_axis_iter; + // Ignore: Pointed by axis, but not squeezable + return !dim.compatible(1); + } else { + return true; + } + }; + + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + not_squeezable_at_axis); + } + } else { + output_shape = PartialShape::dynamic(); + } + return output_shapes; +} +} // namespace v0 + +namespace v15 { +template +bool apply_allow_axis_skip(const ov::op::v15::Squeeze* const op, + const std::unique_ptr>& unique_axes, + const T& arg_shape) { + using DimType = typename T::value_type; + int64_t i{-1}; + + return op->get_allow_axis_skip() && + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [&unique_axes, &i](const DimType& d) { + ++i; + // Squeeze result with dynamic rank if 1 is in range of selected dynamic dimension. + return d.is_dynamic() && d.compatible(1) && unique_axes->find(i) != unique_axes->end(); + }); +} /** * \brief Do Squeeze shape inference. @@ -59,7 +170,7 @@ std::vector shape_infer(const Squeeze* op, return dim.compatible(1); }); if (has_squeezable_dim) { - output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + output_shape = PartialShape::dynamic(); } else { output_shape = arg_shape; } @@ -71,7 +182,9 @@ std::vector shape_infer(const Squeeze* op, NODE_VALIDATION_CHECK(op, false); } - if (arg_rank.is_static() && (unique_axes != nullptr)) { + if (!arg_rank.is_static() || (unique_axes == nullptr) || apply_allow_axis_skip(op, unique_axes, arg_shape)) { + output_shape = PartialShape::dynamic(); + } else if (arg_rank.is_static() && (unique_axes != nullptr)) { output_shape.resize(0); if (unique_axes->empty()) { // if only first input provided or axes are empty remove all dimensions equal to 1. @@ -115,6 +228,6 @@ std::vector shape_infer(const Squeeze* op, } return output_shapes; } -} // namespace v0 +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/shape_inference/include/stft_shape_inference.hpp b/src/core/shape_inference/include/stft_shape_inference.hpp index 9605a8fa797d8d..41abf6640ddb96 100644 --- a/src/core/shape_inference/include/stft_shape_inference.hpp +++ b/src/core/shape_inference/include/stft_shape_inference.hpp @@ -16,6 +16,8 @@ std::vector shape_infer(const STFT* op, const std::vector& input_shapes, const ITensorAccessor& ta = make_tensor_accessor()) { using TDim = typename TRShape::value_type; + using TDimVal = typename TDim::value_type; + NODE_VALIDATION_CHECK(op, input_shapes.size() == 4); const auto& signal_shape = input_shapes[0]; @@ -23,10 +25,11 @@ std::vector shape_infer(const STFT* op, const auto& frame_size_shape = input_shapes[2]; const auto& frame_step_shape = input_shapes[3]; + const auto signal_shape_rank = signal_shape.rank(); NODE_SHAPE_INFER_CHECK(op, input_shapes, - signal_shape.rank().compatible(2), - "The shape of signal must be 2D [batch, signal_size]."); + signal_shape_rank.compatible(1) || signal_shape_rank.compatible(2), + "The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]."); NODE_SHAPE_INFER_CHECK(op, input_shapes, window_shape.rank().compatible(1), @@ -40,25 +43,36 @@ std::vector shape_infer(const STFT* op, frame_step_shape.rank().compatible(0), "The shape of frame_step must be a scalar."); + if (signal_shape_rank.is_dynamic()) { + return {signal_shape}; + } + const auto frame_size = get_input_const_data_as(op, 2, ta); const auto frame_step = get_input_const_data_as(op, 3, ta); - if (signal_shape.rank().is_dynamic()) { - return {signal_shape}; - } else if (!frame_size || !frame_step) { - return {TRShape{signal_shape[0], -1, -1, 2}}; + const auto is_signal_1D = signal_shape.size() == 1; + if (!frame_size || !frame_step) { + if (is_signal_1D) { + return {TRShape{TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } else { + return {TRShape{signal_shape[0], TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } } const auto& frame_size_val = (*frame_size)[0]; const auto& frame_step_val = (*frame_step)[0]; + const TDim& signal_dim = is_signal_1D ? signal_shape[0] : signal_shape[1]; + const bool is_frame_size_in_range = + 0 < frame_size_val && (signal_dim.is_static() ? static_cast(frame_size_val) <= signal_dim.get_length() + : frame_size_val <= signal_dim.get_interval().get_max_val()); NODE_SHAPE_INFER_CHECK(op, input_shapes, - 0 < frame_size_val && frame_size_val < signal_shape[1].get_interval().get_max_val(), + is_frame_size_in_range, "Provided frame size is ", frame_size_val, " but must be in range [1, ", - signal_shape[1], + signal_dim, "]."); NODE_SHAPE_INFER_CHECK(op, @@ -68,17 +82,18 @@ std::vector shape_infer(const STFT* op, frame_step_val, " but must be greater than zero."); + const bool is_win_shape_correct = + window_shape.is_dynamic() || (TDimVal{0} < window_shape[0].get_length() && + window_shape[0].get_length() <= static_cast(frame_size_val)); NODE_SHAPE_INFER_CHECK(op, input_shapes, - window_shape.is_dynamic() || - (0 < window_shape[0].get_length() && window_shape[0].get_length() <= frame_size_val), + is_win_shape_correct, "Window input dimension must be in range [1, ", frame_size_val, "]."); - const auto& batch_dim = signal_shape[0]; - const TDim frame_size_dim = TDim{frame_size_val}; - const TDim signal_frame_size_diff = signal_shape[1] - frame_size_dim; + const TDim frame_size_dim = static_cast(frame_size_val); + const TDim signal_frame_size_diff = signal_dim - frame_size_dim; TDim fft_samples_dim = (frame_size_val / 2) + 1; // Divsion opeartor for static Dimension of PartialShape can return non static dimension and ceil instead of floor @@ -87,9 +102,13 @@ std::vector shape_infer(const STFT* op, std::vector output_shapes; if (op->get_transpose_frames()) { - output_shapes.emplace_back(TRShape{batch_dim, std::move(fft_samples_dim), std::move(frames_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(fft_samples_dim), std::move(frames_dim), 2}); } else { - output_shapes.emplace_back(TRShape{batch_dim, std::move(frames_dim), std::move(fft_samples_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(frames_dim), std::move(fft_samples_dim), 2}); + } + if (!is_signal_1D) { + const auto& batch_dim = signal_shape[0]; + output_shapes[0].insert(output_shapes[0].begin(), batch_dim); } return output_shapes; } diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp index 8b9bb012b27106..65b5ff31861d8e 100644 --- a/src/core/src/op/search_sorted.cpp +++ b/src/core/src/op/search_sorted.cpp @@ -12,9 +12,13 @@ namespace ov { namespace op { namespace v15 { -SearchSorted::SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode) +SearchSorted::SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode, + const element::Type& output_type) : Op({sorted_sequence, values}), - m_right_mode(right_mode) { + m_right_mode(right_mode), + m_output_type(output_type) { constructor_validate_and_infer_types(); } @@ -23,20 +27,25 @@ void SearchSorted::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_element_type(0).compatible(get_input_element_type(1)), "Sorted sequence and values must have the same element type."); + NODE_VALIDATION_CHECK(this, + m_output_type == element::i32 || m_output_type == element::i64, + "The element type of the last output can only be set to i32 or i64."); + const auto& output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); - set_output_type(0, ov::element::i64, output_shapes[0]); + set_output_type(0, m_output_type, output_shapes[0]); } bool SearchSorted::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v15_SearchSorted_visit_attributes); visitor.on_attribute("right_mode", m_right_mode); + visitor.on_attribute("output_type", m_output_type); return true; } std::shared_ptr SearchSorted::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v15_SearchSorted_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode()); + return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode(), get_output_type_attr()); } } // namespace v15 } // namespace op diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp index 1b34a4e48a4faf..b79165ca4f5543 100644 --- a/src/core/src/op/squeeze.cpp +++ b/src/core/src/op/squeeze.cpp @@ -6,31 +6,19 @@ #include -#include "bound_evaluate.hpp" #include "itt.hpp" -#include "openvino/core/validation_util.hpp" -#include "openvino/op/constant.hpp" #include "squeeze_shape_inference.hpp" namespace ov { namespace op { namespace v0 { -namespace validate { -namespace { +Squeeze::Squeeze() : util::SqueezeBase() {} -bool axes_has_and_set_bound(const Node& op) { - return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); -} -} // namespace -} // namespace validate - -Squeeze::Squeeze() : Op() {} - -Squeeze::Squeeze(const Output& data, const Output& axes) : Op({data, axes}) { +Squeeze::Squeeze(const Output& data, const Output& axes) : util::SqueezeBase(data, axes) { constructor_validate_and_infer_types(); } -Squeeze::Squeeze(const Output& data) : Op({data}) { +Squeeze::Squeeze(const Output& data) : util::SqueezeBase(data) { constructor_validate_and_infer_types(); } @@ -69,62 +57,68 @@ bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const return true; } -bool Squeeze::has_evaluate() const { - OV_OP_SCOPE(v0_Squeeze_has_evaluate); - const auto validate_axes_type = [](const element::Type& et) -> bool { - switch (et) { - case element::i8: - case element::i16: - case element::i32: - case element::i64: - case element::u8: - case element::u16: - case element::u32: - case element::u64: - return true; - default: - return false; - } - }; - - return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); -} +} // namespace v0 -bool Squeeze::evaluate_lower(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_lower); - return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +namespace v15 { +Squeeze::Squeeze() : util::SqueezeBase() {} + +Squeeze::Squeeze(const Output& data, const bool allow_axis_skip) + : util::SqueezeBase(data), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_upper(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_upper); - return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +Squeeze::Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip) + : util::SqueezeBase(data, axes), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_symbol(TensorSymbolVector& output_symbols) const { - return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +std::shared_ptr Squeeze::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v15_Squeeze_clone_with_new_inputs); + check_new_args_count(this, new_args); + + switch (new_args.size()) { + case 1: + return std::make_shared(new_args[0], m_allow_axis_skip); + case 2: + return std::make_shared(new_args[0], new_args[1], m_allow_axis_skip); + default: + OPENVINO_THROW("Incorrect number of new arguments"); + } } -bool Squeeze::can_constant_fold(const OutputVector& inputs_values) const { - return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +void Squeeze::validate_and_infer_types() { + OV_OP_SCOPE(v15_Squeeze_validate_and_infer_types); + + const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); + const auto output_shapes = shape_infer(this, input_shapes); + + set_output_type(0, get_input_element_type(0), output_shapes[0]); } -bool Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - OV_OP_SCOPE(v0_Squeeze_constant_fold); - if (!can_constant_fold(inputs_values)) { - return false; - } +bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v15_Squeeze_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); - if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { - const auto& shape = get_output_shape(0); - output_values[0] = std::make_shared(*data_const, shape); - return true; - } - return false; + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + outputs[0].set_shape(output_shapes.front().get_shape()); + + std::memcpy(outputs[0].data(), inputs[0].data(), outputs[0].get_byte_size()); + return true; } -bool Squeeze::is_dynamic() const { - return get_output_partial_shape(0).is_dynamic(); +bool Squeeze::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v15_Squeeze_visit_attributes); + visitor.on_attribute("allow_axis_skip", m_allow_axis_skip); + return true; } -} // namespace v0 + +bool Squeeze::get_allow_axis_skip() const { + OV_OP_SCOPE(v15_Squeeze_get_allow_axis_skip); + return m_allow_axis_skip; +} +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/src/op/util/squeeze_base.cpp b/src/core/src/op/util/squeeze_base.cpp new file mode 100644 index 00000000000000..be5a20cbb58620 --- /dev/null +++ b/src/core/src/op/util/squeeze_base.cpp @@ -0,0 +1,91 @@ +#include "openvino/op/util/squeeze_base.hpp" + +#include "bound_evaluate.hpp" +#include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" + +namespace ov { +namespace op { + +namespace validate { +namespace { + +bool axes_has_and_set_bound(const Node& op) { + return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); +} +} // namespace +} // namespace validate + +namespace util { +SqueezeBase::SqueezeBase(const Output& data, const Output& axes) : Op({data, axes}) { + constructor_validate_and_infer_types(); +} + +SqueezeBase::SqueezeBase(const Output& data) : Op({data}) { + constructor_validate_and_infer_types(); +} + +bool SqueezeBase::has_evaluate() const { + OV_OP_SCOPE(util_SqueezeBase_has_evaluate); + const auto validate_axes_type = [](const element::Type& et) -> bool { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: + return true; + default: + return false; + } + }; + + return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); +} + +bool SqueezeBase::evaluate_lower(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_lower); + return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_upper(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_upper); + return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_symbol(TensorSymbolVector& output_symbols) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_symbol); + return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +} + +bool SqueezeBase::can_constant_fold(const OutputVector& inputs_values) const { + OV_OP_SCOPE(util_SqueezeBase_can_constant_fold); + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + +bool SqueezeBase::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { + OV_OP_SCOPE(util_SqueezeBase_constant_fold); + if (!can_constant_fold(inputs_values)) { + return false; + } + + if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { + const auto& shape = get_output_shape(0); + output_values[0] = std::make_shared(*data_const, shape); + return true; + } + return false; +} + +bool SqueezeBase::is_dynamic() const { + OV_OP_SCOPE(util_SqueezeBase_is_dynamic); + return get_output_partial_shape(0).is_dynamic(); +} + +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 3af6d2c4b5313f..f179630b155d22 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -1173,7 +1173,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml, pugi::xml_node rt_info_node = netXml.append_child("rt_info"); for (const auto& it : model.get_rt_info()) { // Skip IR version - if (it.first == "version") + if (it.first == "version" || it.first == "__weights_path") continue; serialize_rt_info(rt_info_node, it.first, it.second); } diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index c3ed58783ac946..89acd7bd1809d0 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -18,6 +18,7 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/threading.cpp if(SUGGEST_OVERRIDE_SUPPORTED) set_source_files_properties(ov_tensor_test.cpp type_prop/multiclass_nms.cpp + type_prop/squeeze.cpp PROPERTIES COMPILE_OPTIONS -Wno-suggest-override) endif() diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index 2df8bade6a2f2c..3a2a590de43edf 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -75,7 +75,7 @@ INSTANTIATE_TEST_SUITE_P(opset, OpsetTestParams{ov::get_opset12, 178}, OpsetTestParams{ov::get_opset13, 186}, OpsetTestParams{ov::get_opset14, 188}, - OpsetTestParams{ov::get_opset15, 15}), + OpsetTestParams{ov::get_opset15, 199}), OpsetTestNameGenerator{}); class MyOpOld : public ov::op::Op { diff --git a/src/core/tests/type_prop/squeeze.cpp b/src/core/tests/type_prop/squeeze.cpp index c7d81fd97c2786..7be05de1876d9f 100644 --- a/src/core/tests/type_prop/squeeze.cpp +++ b/src/core/tests/type_prop/squeeze.cpp @@ -7,193 +7,261 @@ #include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/op/broadcast.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/unsqueeze.hpp" #include "sequence_generator.hpp" using namespace std; using namespace ov; using namespace testing; -TEST(type_prop, squeeze_axes_invalid_value) { +namespace { + +template +class SqueezelOperator : public TypePropOpTest {}; + +using SqueezeTypes = ::testing::Types; + +TYPED_TEST_SUITE(SqueezelOperator, SqueezeTypes); + +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_value) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::u64, Shape{2}, vector{0, 2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 3, 4})); } -TEST(type_prop, squeeze_single_input) { +TYPED_TEST(SqueezelOperator, squeeze_single_input) { auto param = make_shared(element::f32, PartialShape{1, -1, 3, 4}); - auto s = make_shared(param); - EXPECT_EQ(s->get_output_partial_shape(0), PartialShape::dynamic()); + const auto squeeze = this->make_op(param); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_axes_invalid_rank) { +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_rank) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::i32, Shape{2, 1}, vector{0, 2}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), NodeValidationFailure, HasSubstr("Second input (axes) should not be of rank higher than 1.")); } -TEST(type_prop, squeeze_incorrect_negative_axes) { +TYPED_TEST(SqueezelOperator, squeeze_incorrect_negative_axes) { auto param = make_shared(element::f32, Shape{1, 4, 1, 4, 1, 8}); auto axes_node = make_shared(element::i64, Shape{2}, vector{-6, -10}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), ov::Exception, HasSubstr("Axis -10 out of the tensor rank range")); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 2, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 2, 4})); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { auto param = std::make_shared(ov::element::f32, PartialShape{}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, -1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{3}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + using SqueezeTypePropTestParam = std::tuple, // Squeeze axis PartialShape // Expected shape @@ -288,26 +356,44 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_default_axes, TEST_P(SqueezeTest, partial_shape_dimension_propagation_const_axis_i32) { const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_parameter_axes_no_data) { const auto axes_node = std::make_shared(element::u64, PartialShape{Shape{axes.size()}}); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_dynamic_axes) { const auto axes_node = std::make_shared(element::u64, PartialShape::dynamic()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } } TEST_P(SqueezeTest, symbols_propagation) { @@ -321,9 +407,14 @@ TEST_P(SqueezeTest, symbols_propagation) { param = make_shared(element::f32, p_shape); const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } } using SqueezeShapeTests = SqueezeTest; @@ -336,10 +427,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, TEST_P(SqueezeShapeTests, shape_dimension_propagation_const_axis_i64) { param = std::make_shared(element::f64, p_shape.to_shape()); const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f64); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } } using SqueezeNoAxesTest = SqueezeTest; @@ -350,10 +447,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, PrintToStringParamName()); TEST_P(SqueezeNoAxesTest, partial_shape_dimension_propagation_no_axes) { - const auto squeeze = std::make_shared(param); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeScalarAxisTest = SqueezeTest; @@ -368,25 +471,35 @@ INSTANTIATE_TEST_SUITE_P( TEST_P(SqueezeScalarAxisTest, axis_value_as_vector) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeScalarAxisTest, axis_value_as_integer) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes.front()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeBoundTest = UnSqueezeBoundTest; -INSTANTIATE_TEST_SUITE_P( - type_prop_bounds_propagate, - SqueezeBoundTest, +const auto test_values_in = Values(std::make_tuple(PartialShape::dynamic(6), PartialShape::dynamic(1)), std::make_tuple(PartialShape{Dimension(-1)}, PartialShape{Dimension(-1)}), std::make_tuple(PartialShape{Dimension::dynamic(), 8}, PartialShape{Dimension::dynamic()}), @@ -394,34 +507,136 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(PartialShape{Dimension(20, -1), Dimension::dynamic()}, PartialShape{{20, -1}}), std::make_tuple(PartialShape{Dimension(-1, 5), Dimension::dynamic()}, PartialShape{Dimension(-1, 5)}), std::make_tuple(PartialShape{15}, PartialShape{15}), - std::make_tuple(PartialShape{2, 6}, PartialShape{2})), - PrintToStringParamName()); + std::make_tuple(PartialShape{2, 6}, PartialShape{2})); + +INSTANTIATE_TEST_SUITE_P(type_prop_bounds_propagate, SqueezeBoundTest, test_values_in, PrintToStringParamName()); /** * \brief Check symbol and dynamic value propagation. * * Test use evaluate symbol, lower/upper. */ -TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value) { +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v0) { PartialShape symboled_shape = PartialShape{p_shape}; in_symbols = set_shape_symbols(symboled_shape); - constexpr auto et = element::i64; - const auto symboled_param = std::make_shared(et, symboled_shape); - const auto symboled_shape_of = std::make_shared(symboled_param); + const auto squeeze = create_squeeze(symboled_shape); + const auto bc = std::make_shared(param, squeeze); + + EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); + const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); + EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); +} - const auto zero = std::vector{0}; - const auto axis = std::make_shared(et, Shape{}, zero); - const auto indices = std::make_shared(et, Shape{}, zero); - const auto gather = std::make_shared(symboled_shape_of, indices, axis); - const auto axis_1 = std::make_shared(et, Shape{2}, std::vector{0, 1}); - const auto unsqueeze = std::make_shared(gather, axis_1); - const auto squeeze = std::make_shared(unsqueeze, axis); +/** + * \brief Check symbol and dynamic value propagation. + * + * Test use evaluate symbol, lower/upper. + */ +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v15) { + PartialShape symboled_shape = PartialShape{p_shape}; + + in_symbols = set_shape_symbols(symboled_shape); + const auto squeeze = create_squeeze(symboled_shape); const auto bc = std::make_shared(param, squeeze); EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); } + +using SqueezeAxesDynamicRankTestParam = decltype(std::tuple_cat(SqueezeTypePropTestParam{}, std::make_tuple(false))); +class SqueezeAxesDynamicRank : public ::testing::TestWithParam { +protected: + ov::PartialShape p_shape{}, exp_shape{}; + std::vector axes{}; + bool allow_axis_skip{}; +}; + +INSTANTIATE_TEST_SUITE_P( + SqueezeAxesDynamicRankTests, + SqueezeAxesDynamicRank, + ::testing::Values( + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape{1, 2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, true), + + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, true), + + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, false), + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, true), + + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, false), + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, true))); + +TEST_P(SqueezeAxesDynamicRank, squeeze_axes_dynamic_rank_param) { + const auto& params = GetParam(); + p_shape = std::get<0>(params); + axes = std::get<1>(params); + exp_shape = std::get<2>(params); + allow_axis_skip = std::get<3>(params); + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::u64, Shape{axes.size()}, axes); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_single_axis) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, Shape{1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_axes) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, PartialShape{-1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +} // namespace diff --git a/src/core/tests/type_prop/stft.cpp b/src/core/tests/type_prop/stft.cpp index 60fe9597032714..2969af4e5a43bd 100644 --- a/src/core/tests/type_prop/stft.cpp +++ b/src/core/tests/type_prop/stft.cpp @@ -104,6 +104,14 @@ INSTANTIATE_TEST_SUITE_P( type_prop_stft_shape, TypePropSTFTTestP, testing::Values( + std::make_tuple(PartialShape{16}, PartialShape{16}, 16, 16, true, PartialShape{9, 1, 2}), + std::make_tuple(PartialShape{48}, PartialShape{16}, 16, 16, false, PartialShape{3, 9, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, false, PartialShape{16, 6, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, true, PartialShape{6, 16, 2}), + std::make_tuple(PartialShape{48}, PartialShape{8}, 16, 4, true, PartialShape{9, 9, 2}), + std::make_tuple(PartialShape{{48, 56}}, PartialShape{7}, 11, 3, true, PartialShape{6, {13, 16}, 2}), + std::make_tuple(PartialShape{-1}, PartialShape{7}, 11, 3, true, PartialShape{6, {1, -1}, 2}), + std::make_tuple(PartialShape{1, 16}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 1, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 3, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, false, PartialShape{1, 3, 9, 2}), std::make_tuple(PartialShape{2, 48}, PartialShape{8}, 16, 4, true, PartialShape{2, 9, 9, 2}), @@ -138,16 +146,16 @@ TEST_F(TypePropSTFTTest, signal_incompatible_shape) { const auto frame_size = std::make_shared(element::i64, PartialShape{}); const auto frame_step = std::make_shared(element::i64, PartialShape{}); { - const auto signal = std::make_shared(element::f32, PartialShape{48}); + const auto signal = std::make_shared(element::f32, PartialShape{}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } { const auto signal = std::make_shared(element::f32, PartialShape{-1, 4, 48}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } } diff --git a/src/core/tests/visitors/op/sorted_search.cpp b/src/core/tests/visitors/op/sorted_search.cpp index 860c9528d0e9aa..10d544527f3714 100644 --- a/src/core/tests/visitors/op/sorted_search.cpp +++ b/src/core/tests/visitors/op/sorted_search.cpp @@ -22,7 +22,7 @@ TEST(attributes, search_sorted_op) { auto g_op = ov::as_type_ptr(builder.create()); // attribute count - const auto expected_attr_count = 1; + const auto expected_attr_count = 2; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); // space_to_depth attributes diff --git a/src/core/tests/visitors/op/squeeze.cpp b/src/core/tests/visitors/op/squeeze.cpp index 6eb1674b26329a..be596a5fb1dc67 100644 --- a/src/core/tests/visitors/op/squeeze.cpp +++ b/src/core/tests/visitors/op/squeeze.cpp @@ -6,7 +6,16 @@ #include "unary_ops.hpp" +namespace v0 { using Types = ::testing::Types, UnaryOperatorType>; INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v0 + +namespace v15 { +using Types = ::testing::Types, + UnaryOperatorTypeWithAttribute>; + +INSTANTIATE_TYPED_TEST_SUITE_P(visitor_single_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v15 diff --git a/src/core/tests/visitors/op/unary_ops.hpp b/src/core/tests/visitors/op/unary_ops.hpp index 3bef2429983e9f..6cc2afda62e253 100644 --- a/src/core/tests/visitors/op/unary_ops.hpp +++ b/src/core/tests/visitors/op/unary_ops.hpp @@ -9,12 +9,17 @@ #include "openvino/op/parameter.hpp" #include "visitors/visitors.hpp" -template +template class UnaryOperatorType { public: using op_type = T; static constexpr ov::element::Type_t element_type = ELEMENT_TYPE; + static constexpr int expected_attr_count = ATTRIBUTES_COUNT; }; + +template +using UnaryOperatorTypeWithAttribute = UnaryOperatorType; + template class UnaryOperatorVisitor : public testing::Test {}; @@ -43,7 +48,7 @@ TYPED_TEST_P(UnaryOperatorVisitor, No_Attribute_4D) { EXPECT_NO_THROW(auto g_op_func = ov::as_type_ptr(builder.create())); - const auto expected_attr_count = 0; + const auto expected_attr_count = TypeParam::expected_attr_count; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); } diff --git a/src/frontends/common/include/openvino/frontend/extension/op.hpp b/src/frontends/common/include/openvino/frontend/extension/op.hpp index 4198c411082e42..55fa919447406a 100644 --- a/src/frontends/common/include/openvino/frontend/extension/op.hpp +++ b/src/frontends/common/include/openvino/frontend/extension/op.hpp @@ -25,7 +25,7 @@ inline const ov::OpSet& get_opset_by_name(const std::string& opset_name) { if (opsets.find(opset_name) != opsets.end()) return opsets.at(opset_name)(); if (opset_name.empty() || opset_name == "latest") { - return ov::get_opset14(); // TODO (ticket 138273): Update at the end of the opset15 development + return ov::get_opset15(); } else { FRONT_END_GENERAL_CHECK(false, "Unsupported opset name: ", opset_name); } diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index db979a35d932af..c5e137e1decc89 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -168,15 +168,21 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const return exts; }; - auto create_input_model = [&]() -> std::shared_ptr { + auto create_input_model = [&](std::string weights_path) -> std::shared_ptr { if (provided_model_stream) { - return std::make_shared(*provided_model_stream, weights, create_extensions_map()); + return std::make_shared(*provided_model_stream, + weights, + create_extensions_map(), + std::move(weights_path)); } else if (local_model_stream.is_open()) { - auto input_model = std::make_shared(local_model_stream, weights, create_extensions_map()); + auto input_model = std::make_shared(local_model_stream, + weights, + create_extensions_map(), + std::move(weights_path)); local_model_stream.close(); return input_model; } else if (model_buf) { - return std::make_shared(model_buf, weights, create_extensions_map()); + return std::make_shared(model_buf, weights, create_extensions_map(), std::move(weights_path)); } return nullptr; }; @@ -278,7 +284,7 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const } } - return create_input_model(); + return create_input_model(ov::util::path_to_string(weights_path)); } std::shared_ptr FrontEnd::convert(const InputModel::Ptr& model) const { diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp index 6c59617c69a48d..b4d9ef164e994c 100644 --- a/src/frontends/ir/src/input_model.cpp +++ b/src/frontends/ir/src/input_model.cpp @@ -205,13 +205,16 @@ class InputModel::InputModelIRImpl { std::unordered_map m_opsets; pugi::xml_node m_root; pugi::xml_document m_xml_doc; + std::string m_weights_path; public: InputModelIRImpl(std::istream& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) + const std::unordered_map& extensions, + std::string weights_path) : m_weights(weights), - m_extensions(extensions) { + m_extensions(extensions), + m_weights_path(std::move(weights_path)) { pugi::xml_parse_result res = m_xml_doc.load(model); OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); init_opset(); @@ -219,9 +222,11 @@ class InputModel::InputModelIRImpl { InputModelIRImpl(const std::shared_ptr& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) + const std::unordered_map& extensions, + std::string weights_path) : m_weights(weights), - m_extensions(extensions) { + m_extensions(extensions), + m_weights_path(std::move(weights_path)) { auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); init_opset(); @@ -240,14 +245,16 @@ class InputModel::InputModelIRImpl { InputModel::InputModel(std::istream& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) { - _impl = std::make_shared(model, weights, extensions); + const std::unordered_map& extensions, + std::string weights_path) { + _impl = std::make_shared(model, weights, extensions, std::move(weights_path)); } InputModel::InputModel(const std::shared_ptr& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) { - _impl = std::make_shared(model, weights, extensions); + const std::unordered_map& extensions, + std::string weights_path) { + _impl = std::make_shared(model, weights, extensions, std::move(weights_path)); } std::shared_ptr InputModel::convert() { @@ -263,6 +270,8 @@ std::shared_ptr InputModel::InputModelIRImpl::convert() { std::shared_ptr model; visitor.on_attribute("net", model); model->get_rt_info()["version"] = int64_t(version); + if (!m_weights_path.empty()) + model->get_rt_info()["__weights_path"] = m_weights_path; parse_pre_process(m_root, m_weights, model); return model; diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp index 331092749bbeb9..a9ad1224c6ca3a 100644 --- a/src/frontends/ir/src/input_model.hpp +++ b/src/frontends/ir/src/input_model.hpp @@ -22,11 +22,13 @@ class InputModel : public ov::frontend::InputModel { public: InputModel(std::istream& stream, const std::shared_ptr& weights, - const std::unordered_map& extensions); + const std::unordered_map& extensions, + std::string weights_path = {}); InputModel(const std::shared_ptr& model_buf, const std::shared_ptr& weights, - const std::unordered_map& extensions); + const std::unordered_map& extensions, + std::string weights_path = {}); std::shared_ptr convert(); }; diff --git a/src/frontends/jax/src/op/erfc.cpp b/src/frontends/jax/src/op/erfc.cpp new file mode 100644 index 00000000000000..5a38577f868d35 --- /dev/null +++ b/src/frontends/jax/src/op/erfc.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +OutputVector translate_erfc(const NodeContext& context) { + num_inputs_check(context, 1, 1); + auto x = context.get_input(0); + + // create const one of the same type as x + auto const_one = create_same_type_const_scalar(x, 1); + Output res = make_shared(x); + res = make_shared(const_one, res); + return {res}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov diff --git a/src/frontends/jax/src/op_table.cpp b/src/frontends/jax/src/op_table.cpp index 500226594fea13..98f22452c5afab 100644 --- a/src/frontends/jax/src/op_table.cpp +++ b/src/frontends/jax/src/op_table.cpp @@ -45,6 +45,7 @@ OP_CONVERTER(translate_convert); OP_CONVERTER(translate_convolution); OP_CONVERTER(translate_copy); OP_CONVERTER(translate_dot_general); +OP_CONVERTER(translate_erfc); OP_CONVERTER(translate_integer_pow); OP_T_CONVERTER(translate_reduce_op); OP_CONVERTER(translate_reduce_window_max); @@ -72,6 +73,7 @@ const std::map get_supported_ops_jaxpr() { {"dot_general", op::translate_dot_general}, {"eq", op::translate_binary_op}, {"erf", op::translate_1to1_match_1_input}, + {"erfc", op::translate_erfc}, {"exp", op::translate_1to1_match_1_input}, {"ge", op::translate_binary_op}, {"gt", op::translate_binary_op}, diff --git a/src/frontends/onnx/frontend/src/core/tensor.cpp b/src/frontends/onnx/frontend/src/core/tensor.cpp index 4f8f54e2d83690..b23f6c55253ac1 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.cpp +++ b/src/frontends/onnx/frontend/src/core/tensor.cpp @@ -82,18 +82,11 @@ std::vector Tensor::get_data() const { if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); } -#ifdef ONNX_VERSION_116 if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT8 || m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT4) { return detail::__get_data(m_tensor_proto->int32_data()); } ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "INT4, INT8, raw data"); -#else - if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT8) { - return detail::__get_data(m_tensor_proto->int32_data()); - } - ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "INT8, raw data"); -#endif } template <> @@ -146,18 +139,11 @@ std::vector Tensor::get_data() const { if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); } -#ifdef ONNX_VERSION_116 if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT8 || m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT4) { return detail::__get_data(m_tensor_proto->int32_data()); } ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "UINT4, UINT8, raw data"); -#else - if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT8) { - return detail::__get_data(m_tensor_proto->int32_data()); - } - ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "UINT8, raw data"); -#endif } template <> diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index ae6fe28754b4e5..af4d299f9d45e7 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -65,10 +65,8 @@ class Tensor { enum class Type { undefined = TensorProto_DataType::TensorProto_DataType_UNDEFINED, float32 = TensorProto_DataType::TensorProto_DataType_FLOAT, -#ifdef ONNX_VERSION_116 uint4 = TensorProto_DataType::TensorProto_DataType_UINT4, int4 = TensorProto_DataType::TensorProto_DataType_INT4, -#endif uint8 = TensorProto_DataType::TensorProto_DataType_UINT8, int8 = TensorProto_DataType::TensorProto_DataType_INT8, uint16 = TensorProto_DataType::TensorProto_DataType_UINT16, @@ -146,10 +144,8 @@ class Tensor { return ov::element::f16; case TensorProto_DataType::TensorProto_DataType_DOUBLE: return ov::element::f64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return ov::element::i4; -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return ov::element::i8; case TensorProto_DataType::TensorProto_DataType_INT16: @@ -158,10 +154,8 @@ class Tensor { return ov::element::i32; case TensorProto_DataType::TensorProto_DataType_INT64: return ov::element::i64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return ov::element::u4; -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return ov::element::u8; case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -205,10 +199,8 @@ class Tensor { return make_ov_constant(ov::element::f16); case TensorProto_DataType::TensorProto_DataType_DOUBLE: return make_ov_constant(ov::element::f64); -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return make_ov_constant(ov::element::i4); -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return make_ov_constant(ov::element::i8); case TensorProto_DataType::TensorProto_DataType_INT16: @@ -217,10 +209,8 @@ class Tensor { return make_ov_constant(ov::element::i32); case TensorProto_DataType::TensorProto_DataType_INT64: return make_ov_constant(ov::element::i64); -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return make_ov_constant(ov::element::u4); -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return make_ov_constant(ov::element::u8); case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -238,17 +228,10 @@ class Tensor { case TensorProto_DataType::TensorProto_DataType_STRING: return make_ov_constant(ov::element::string); default: -#ifdef ONNX_VERSION_116 ONNX_UNSUPPORTED_DATA_TYPE( m_tensor_proto->data_type(), "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); -#else - ONNX_UNSUPPORTED_DATA_TYPE( - m_tensor_proto->data_type(), - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT8, INT16, INT32, INT64, " - "UINT8, UINT16, UINT32, UINT64, STRING"); -#endif } } diff --git a/src/frontends/onnx/frontend/src/frontend.cpp b/src/frontends/onnx/frontend/src/frontend.cpp index 8afc9b661ec28d..5ad28be3654422 100644 --- a/src/frontends/onnx/frontend/src/frontend.cpp +++ b/src/frontends/onnx/frontend/src/frontend.cpp @@ -8,7 +8,9 @@ #endif #include -#ifndef OV_PROTOBUF_ABSL_IS_USED +#ifdef OV_PROTOBUF_ABSL_IS_USED +# include +#else # include #endif @@ -47,7 +49,9 @@ ONNX_FRONTEND_C_API void* get_front_end_data() { }; #ifndef OPENVINO_DEBUG_ENABLE // disable protobuf logging -# ifndef OV_PROTOBUF_ABSL_IS_USED +# ifdef OV_PROTOBUF_ABSL_IS_USED + absl::SetGlobalVLogLevel(0); +# else google::protobuf::SetLogHandler(nullptr); # endif #endif diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp new file mode 100644 index 00000000000000..c4144be9b5ff44 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "core/operator_set.hpp" +#include "exceptions.hpp" +#include "openvino/frontend/exception.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/sigmoid.hpp" +#include "utils/common.hpp" + +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace onnx { +namespace com_microsoft { +namespace opset_1 { +ov::OutputVector quick_gelu(const ov::frontend::onnx::Node& node) { + // Original Documentation: + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QuickGelu + // Goal: Compute x * Sigmoid(alpha * x) + common::default_op_checks(node, 1); + + const auto inputs = node.get_ov_inputs(); + const auto& x = inputs[0]; + + // Constrain input type to float16, float, double (f64), bfloat16 + auto element_type = x.get_element_type(); + CHECK_VALID_NODE(node, + element_type == ov::element::f16 || element_type == ov::element::f32 || + element_type == ov::element::f64 || element_type == ov::element::bf16, + "Unsupported input x type, accepted FP16, FP32, FP64, BFP16 but got: ", + element_type); + + // Get attribute from node + const float alpha = node.get_attribute_value("alpha"); + + // Numpy broadcasting rule is automatically applied with mismatched shapes according to: + // https://docs.openvino.ai/2022.3/openvino_docs_ops_arithmetic_Multiply_1.html "Tensor with dimension of size 1 + // will be implicitly broadcasted to match the size of the second tensor." Convert alpha to tensor with size 1 + const auto alpha_tensor = std::make_shared(ov::element::f32, Shape{1}, alpha); + + auto alpha_x = std::make_shared(alpha_tensor, x); + auto sig_alpha_x = std::make_shared(alpha_x); + auto result = std::make_shared(x, sig_alpha_x); + + return {result}; +} // func end + +ONNX_OP("QuickGelu", OPSET_SINCE(1), com_microsoft::opset_1::quick_gelu, MICROSOFT_DOMAIN); + +} // namespace opset_1 +} // namespace com_microsoft +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index b09bc73467bc10..d7b5214f3e53f4 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -221,19 +221,8 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { FRONT_END_GENERAL_CHECK(src_x.get_partial_shape().is_static(), "DequantizeLinear cannot operate with dynamic shapes of input X"); - const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); - - if (inputs.size() > 2) { - zp = inputs[2]; - if (zp.get_element_type() != scale.get_element_type()) { - zp = std::make_shared(zp, scale); - } - zp = std::make_shared(zp, unsqueezed_axes); - } - const auto axis = node.get_attribute_value("axis", 1); const auto block_size = static_cast(node.get_attribute_value("block_size", 0)); - const auto scale_type = scale.get_element_type(); FRONT_END_GENERAL_CHECK(axis == 0, "Axis != 0 isn't supported"); FRONT_END_GENERAL_CHECK(block_size > 0, "block_size must be greater than zero"); @@ -241,16 +230,30 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { src_x.get_shape()[0] % block_size == 0, "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); - const auto& x = src_x.get_element_type() == scale_type ? src_x : std::make_shared(src_x, scale); + ov::Output broadcastable_x = op::util::reshape( + src_x, + Shape{static_cast(src_x.get_shape()[0]) / block_size, block_size, src_x.get_shape()[1]}); + + const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); + + const auto scale_type = scale.get_element_type(); + if (inputs.size() > 2) { + zp = inputs[2]; + if (zp.get_element_type() != scale.get_element_type()) { + zp = std::make_shared(zp, scale_type); + } + zp = std::make_shared(zp, unsqueezed_axes); + } + + const auto& x = src_x.get_element_type() == scale_type ? broadcastable_x + : std::make_shared(broadcastable_x, scale_type); // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] - ov::Output broadcastable_x = - op::util::reshape(x, Shape{static_cast(x.get_shape()[0]) / block_size, block_size, x.get_shape()[1]}); // Adding additional dimension for broadcasting scale = std::make_shared(scale, unsqueezed_axes); if (zp.get_node_shared_ptr()) { - broadcastable_x = std::make_shared(broadcastable_x, zp); + broadcastable_x = std::make_shared(x, zp); } const auto& scaled_x = std::make_shared(broadcastable_x, scale); diff --git a/src/frontends/onnx/frontend/src/utils/common.cpp b/src/frontends/onnx/frontend/src/utils/common.cpp index 66fdcf1c7830c7..46c7be75bbdd66 100644 --- a/src/frontends/onnx/frontend/src/utils/common.cpp +++ b/src/frontends/onnx/frontend/src/utils/common.cpp @@ -42,10 +42,8 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { return ov::element::f16; case TensorProto_DataType::TensorProto_DataType_FLOAT: return ov::element::f32; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return ov::element::i4; -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return ov::element::i8; case TensorProto_DataType::TensorProto_DataType_INT16: @@ -54,10 +52,8 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { return ov::element::i32; case TensorProto_DataType::TensorProto_DataType_INT64: return ov::element::i64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return ov::element::u4; -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return ov::element::u8; case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -77,15 +73,9 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { case TensorProto_DataType::TensorProto_DataType_STRING: return ov::element::string; } -#ifdef ONNX_VERSION_116 ONNX_UNSUPPORTED_DATA_TYPE(onnx_type, "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, " "INT32, INT64, UINT4, UINT8, UINT16, UINT32, UINT64, STRING, UNDEFINED"); -#else - ONNX_UNSUPPORTED_DATA_TYPE(onnx_type, - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT8, INT16, " - "INT32, INT64, UINT8, UINT16, UINT32, UINT64, STRING, UNDEFINED"); -#endif } void default_op_checks(const Node& node, size_t min_inputs_size) { diff --git a/src/frontends/onnx/onnx_common/CMakeLists.txt b/src/frontends/onnx/onnx_common/CMakeLists.txt index a743c5ac40a0dd..d63bce4083087c 100644 --- a/src/frontends/onnx/onnx_common/CMakeLists.txt +++ b/src/frontends/onnx/onnx_common/CMakeLists.txt @@ -35,18 +35,3 @@ ov_link_system_libraries(${TARGET_NAME} PUBLIC onnx_proto onnx) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) - -# Temporary solution until vcpkg doesn't have fresh ONNX, -# trying determine used version of ONNX to enable modern functionality -find_package(ONNX 1.16.0 QUIET COMPONENTS onnx onnx_proto NO_MODULE) -if(ONNX_FOUND) - target_compile_definitions(${TARGET_NAME} PUBLIC ONNX_VERSION_116) -else() - if(EXISTS "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER") - file(READ "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER" ONNX_VERSION) - string(STRIP "${ONNX_VERSION}" ONNX_VERSION) - if((ONNX_VERSION GREATER "1.16.0") OR (ONNX_VERSION EQUAL "1.16.0")) - target_compile_definitions(${TARGET_NAME} PUBLIC ONNX_VERSION_116) - endif() - endif() -endif() diff --git a/src/frontends/onnx/onnx_common/src/utils.cpp b/src/frontends/onnx/onnx_common/src/utils.cpp index 6ec409c5671458..b83dea1b4cfd99 100644 --- a/src/frontends/onnx/onnx_common/src/utils.cpp +++ b/src/frontends/onnx/onnx_common/src/utils.cpp @@ -30,10 +30,8 @@ size_t get_onnx_data_size(int32_t onnx_type) { return sizeof(ov::float8_e4m3); case TensorProto_DataType_FLOAT8E5M2: return sizeof(ov::float8_e5m2); -#ifdef ONNX_VERSION_116 case TensorProto_DataType_INT4: return sizeof(int8_t); -#endif case TensorProto_DataType_INT8: return sizeof(int8_t); case TensorProto_DataType_INT16: @@ -42,10 +40,8 @@ size_t get_onnx_data_size(int32_t onnx_type) { return sizeof(int32_t); case TensorProto_DataType_INT64: return sizeof(int64_t); -#ifdef ONNX_VERSION_116 case TensorProto_DataType_UINT4: return sizeof(uint8_t); -#endif case TensorProto_DataType_UINT8: return sizeof(uint8_t); case TensorProto_DataType_UINT16: @@ -66,16 +62,12 @@ const std::map OV_2_ONNX_TYPES = { {ov::element::Type_t::f16, TensorProto_DataType::TensorProto_DataType_FLOAT16}, {ov::element::Type_t::f32, TensorProto_DataType::TensorProto_DataType_FLOAT}, {ov::element::Type_t::f64, TensorProto_DataType::TensorProto_DataType_DOUBLE}, -#ifdef ONNX_VERSION_116 {ov::element::Type_t::i4, TensorProto_DataType::TensorProto_DataType_INT4}, -#endif {ov::element::Type_t::i8, TensorProto_DataType::TensorProto_DataType_INT8}, {ov::element::Type_t::i16, TensorProto_DataType::TensorProto_DataType_INT16}, {ov::element::Type_t::i32, TensorProto_DataType::TensorProto_DataType_INT32}, {ov::element::Type_t::i64, TensorProto_DataType::TensorProto_DataType_INT64}, -#ifdef ONNX_VERSION_116 {ov::element::Type_t::u4, TensorProto_DataType::TensorProto_DataType_UINT4}, -#endif {ov::element::Type_t::u8, TensorProto_DataType::TensorProto_DataType_UINT8}, {ov::element::Type_t::u16, TensorProto_DataType::TensorProto_DataType_UINT16}, {ov::element::Type_t::u32, TensorProto_DataType::TensorProto_DataType_UINT32}, diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index 9b928773b7d65a..f508fdb4c1a903 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -134,21 +134,6 @@ target_compile_definitions(ov_onnx_frontend_tests set(ONNX_OPSET_VERSION 17 CACHE INTERNAL "Supported version of ONNX operator set") target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION}) -# Temporary solution until vcpkg doesn't have fresh ONNX, -# trying determine used version of ONNX to enable modern functionality -find_package(ONNX 1.16.0 QUIET COMPONENTS onnx onnx_proto NO_MODULE) -if(ONNX_FOUND) - target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_VERSION_116) -else() - if(EXISTS "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER") - file(READ "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER" ONNX_VERSION) - string(STRIP "${ONNX_VERSION}" ONNX_VERSION) - if((ONNX_VERSION GREATER "1.16.0") OR (ONNX_VERSION EQUAL "1.16.0")) - target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_VERSION_116) - endif() - endif() -endif() - if(ONNX_TESTS_DEPENDENCIES) add_dependencies(ov_onnx_frontend_tests ${ONNX_TESTS_DEPENDENCIES}) endif() diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index fb29faa38b46ec..ef8cebfa361e3f 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -182,3 +182,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_139936 = xfail_test(reason = "MaxPool accuracy fails") xfail_issue_139937 = xfail_test(reason = "GroupNorm, QLinearMatMul, DequantizeLinear translation failed") xfail_issue_139938 = xfail_test(reason = "QLinearMatMul accuracy fails") + +# ONNX 1.17 +skip_issue_119896 = pytest.mark.skip(reason="Unsupported element type: FLOAT8") diff --git a/src/frontends/onnx/tests/ci_utils/onnxruntime/version b/src/frontends/onnx/tests/ci_utils/onnxruntime/version index 85c399f04d78df..774958e19512de 100644 --- a/src/frontends/onnx/tests/ci_utils/onnxruntime/version +++ b/src/frontends/onnx/tests/ci_utils/onnxruntime/version @@ -1 +1 @@ -rel-1.18.1 +rel-1.19.2 diff --git a/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt new file mode 100644 index 00000000000000..4fb110fd485833 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt @@ -0,0 +1,52 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + name: "test_quick_gelu" + node { + input: "X" + output: "Y" + op_type: "QuickGelu" + attribute { + name: "alpha" + f: 0.9974269270896912 + type: FLOAT + } + domain: "com.microsoft" + } + input { + name: "X" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + domain: "com.microsoft" + version: 1 +} \ No newline at end of file diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index bc27a759d415a0..c57cb2babc569b 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -159,7 +159,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_bool_init_raw) { test_case.run(); } -#ifdef ONNX_VERSION_116 OPENVINO_TEST(${BACKEND_NAME}, onnx_int4_const) { auto model = convert_model("int4_const.onnx"); @@ -195,7 +194,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_uint4_input) { test_case.run(); } -#endif OPENVINO_TEST(${BACKEND_NAME}, onnx_model_add_abc_initializers) { auto model = convert_model("add_abc_initializers.onnx"); diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index da8189926a4546..900fc025d8d9ab 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -1330,3 +1330,29 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_matmulnbits_3x17) { } test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_quickgelu) { + const auto model = convert_model("com.microsoft/quick_gelu.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + const std::vector input_X{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + const std::vector output{0.7305524f, + 1.7605114f, + 2.8566725f, + 3.9273243f, + 4.9661055f, + 5.984934f, + 6.9935064f, + 7.997261f, + 8.998864f, + 9.999535f}; + + test_case.add_input(Shape{2, 5}, input_X); + test_case.add_expected_output(Shape{2, 5}, output); + + if (std::string("${BACKEND_NAME}") == std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(0.0001f); + } else { + test_case.run(); + } +} diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index ca4f90ed5d94be..39b9788d720af3 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -57,6 +57,7 @@ xfail_issue_113506, skip_dynamic_model, xfail_issue_119896, + skip_issue_119896, xfail_issue_119900, xfail_issue_119903, xfail_issue_119906, @@ -246,7 +247,11 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_maxunpool_export_with_output_shape_cpu", "OnnxBackendNodeModelTest.test_maxunpool_export_without_output_shape_cpu", ), - (xfail_issue_38724, "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu"), + ( + xfail_issue_38724, + "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu", + "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_extrapolation_value_cpu" + ), ( xfail_issue_33606, "OnnxBackendNodeModelTest.test_det_2d_cpu", @@ -454,6 +459,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_2_3_cpu", "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_3_2_cpu", "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_larger_cpu", + "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_smaller_cpu", ), ( xfail_issue_99970, @@ -520,6 +526,13 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_dequantizelinear_e4m3fn_float16_cpu", "OnnxBackendNodeModelTest.test_dequantizelinear_e4m3fn_zero_point_cpu", ), + ( + skip_issue_119896, + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT_to_FLOAT8E5M2_cpu", + ), ( xfail_issue_119900, "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_half_pixel_symmetric_cpu", @@ -626,6 +639,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None skip_misalignment, "OnnxBackendNodeModelTest.test_gelu_default_2_expanded_cpu", "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_empty_set_expanded_cpu", + "OnnxBackendNodeModelTest.test_reduce_max_empty_set_cpu", "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu", "OnnxBackendNodeModelTest.test_group_normalization_example_cpu", "OnnxBackendNodeModelTest.test_qlinearmatmul_3D_int8_float16_cpu", diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index c6febe08437b5d..163b4d894cb766 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -10,7 +10,9 @@ #endif #include -#ifndef OV_PROTOBUF_ABSL_IS_USED +#ifdef OV_PROTOBUF_ABSL_IS_USED +# include +#else # include #endif @@ -594,7 +596,9 @@ PADDLE_C_API void* get_front_end_data() { #ifndef OPENVINO_DEBUG_ENABLE // disable protobuf logging -# ifndef OV_PROTOBUF_ABSL_IS_USED +# ifdef OV_PROTOBUF_ABSL_IS_USED + absl::SetGlobalVLogLevel(0); +# else google::protobuf::SetLogHandler(nullptr); # endif #endif diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 5906043e51262d..2b0ab6db9d3a09 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -320,6 +320,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { model->get_rt_info().erase("symmetric_quantization"); } manager.register_pass(sym); + manager.register_pass(); manager.register_pass(); manager.run_passes(model); diff --git a/src/frontends/pytorch/src/op/atan2.cpp b/src/frontends/pytorch/src/op/atan2.cpp new file mode 100644 index 00000000000000..341f1c201eae56 --- /dev/null +++ b/src/frontends/pytorch/src/op/atan2.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#define _USE_MATH_DEFINES + +#include + +#include + +#include "openvino/core/type/element_type.hpp" +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/atan.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/greater.hpp" +#include "openvino/op/greater_eq.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_atan2(const NodeContext& context) { + // atan2(input, other, *) → Tensor + num_inputs_check(context, 2, 2); + Output lhs; + Output rhs; + + std::tie(lhs, rhs) = get_inputs_with_promoted_types(context, 0, 1); + + auto div = context.mark_node(std::make_shared(lhs, rhs)); + + auto atan = context.mark_node(std::make_shared(div)); + + // create some constants to adjust result according to quadrant. + auto zero = context.mark_node(v0::Constant::create(ov::element::i32, Shape{}, {0})); + auto pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {M_PI})); + auto half_pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {M_PI_2})); + auto neg_half_pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {-M_PI_2})); + + zero = context.mark_node(std::make_shared(zero, rhs)); + pi = context.mark_node(std::make_shared(pi, rhs)); + half_pi = context.mark_node(std::make_shared(half_pi, rhs)); + neg_half_pi = context.mark_node(std::make_shared(neg_half_pi, rhs)); + + // x > 0, no adjustment needed + auto x_greater_than_zero = context.mark_node(std::make_shared(rhs, zero)); + + // x < 0 and y >= 0, need to plus pi + auto y_greater_equal_zero = context.mark_node(std::make_shared(lhs, zero)); + auto x_less_than_zero = context.mark_node(std::make_shared(rhs, zero)); + auto add_pi_condition = context.mark_node(std::make_shared(x_less_than_zero, y_greater_equal_zero)); + + // x < 0 and y < 0, need to minus pi + auto y_less_than_zero = std::make_shared(lhs, zero); + auto subtract_pi_condition = + context.mark_node(std::make_shared(x_less_than_zero, y_less_than_zero)); + + // x = 0 and y > 0, pi/2 + auto x_equal_zero = std::make_shared(rhs, zero); + auto y_greater_than_zero = std::make_shared(lhs, zero); + auto half_pi_condition = context.mark_node(std::make_shared(x_equal_zero, y_greater_than_zero)); + + // x = 0 and y < 0, -pi/2 + auto neg_half_pi_condition = context.mark_node(std::make_shared(x_equal_zero, y_less_than_zero)); + + auto special_case_condition = + context.mark_node(std::make_shared(half_pi_condition, neg_half_pi_condition)); + + // do adjustment + auto atan_plus_pi = context.mark_node(std::make_shared(atan, pi)); + auto atan_minus_pi = context.mark_node(std::make_shared(atan, pi)); + + // select result + auto ajusted_case = context.mark_node(std::make_shared(add_pi_condition, atan_plus_pi, atan_minus_pi)); + auto special_case = context.mark_node(std::make_shared(half_pi_condition, half_pi, neg_half_pi)); + auto adjusted_atan = context.mark_node(std::make_shared(x_greater_than_zero, atan, ajusted_case)); + auto result = context.mark_node(std::make_shared(special_case_condition, special_case, adjusted_atan)); + + return {result}; +} + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index 4ae2c4ebc81af4..d4f12cae258ad8 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -146,9 +146,17 @@ OutputVector translate_stack_fx(const NodeContext& context) { num_elements -= 1; } + OutputVector stack_inputs; for (size_t i = 0; i < num_elements; i++) { - auto stack_input = - context.mark_node(std::make_shared(context.get_input(static_cast(i)), dim)); + stack_inputs.push_back(context.get_input(static_cast(i))); + } + + // returns the u4 constant if the stack operation is a part of the decompression pattern + if (const auto& u4_const = u4_compression_stack(stack_inputs, axis)) + return {u4_const}; + + for (size_t i = 0; i < num_elements; i++) { + auto stack_input = context.mark_node(std::make_shared(stack_inputs[i], dim)); list_elems.push_back(stack_input); } return translate_cat_common(context, list_elems, axis, true); diff --git a/src/frontends/pytorch/src/op/lerp.cpp b/src/frontends/pytorch/src/op/lerp.cpp new file mode 100644 index 00000000000000..67922da3e4578d --- /dev/null +++ b/src/frontends/pytorch/src/op/lerp.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_lerp(const NodeContext& context) { + // Tensor = aten::lerp(%lhs.1, %rhs.1, %self.weight) + num_inputs_check(context, 3, 3); + Output start; + Output end; + std::tie(start, end) = get_inputs_with_promoted_types(context, 0, 1); + + Output weight = context.get_input(2); + auto scale = context.mark_node(std::make_shared(end, start)); + weight = context.mark_node(std::make_shared(weight, scale)); + auto delta = context.mark_node(std::make_shared(scale, weight)); + return {context.mark_node(std::make_shared(start, delta))}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/search_sorted.cpp b/src/frontends/pytorch/src/op/search_sorted.cpp new file mode 100644 index 00000000000000..ca9f6b49ff7bf9 --- /dev/null +++ b/src/frontends/pytorch/src/op/search_sorted.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/search_sorted.hpp" + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_search_sorted(const NodeContext& context) { + num_inputs_check(context, 2, 5); + Output sorted; + Output values; + std::tie(sorted, values) = get_inputs_with_promoted_types(context, 0, 1); + const bool out_int32 = context.const_input(2); + PYTORCH_OP_CONVERSION_CHECK(out_int32 == false, "aten::searchsorted(out_int32=true) unsupported"); + const bool right_mode = context.const_input(3); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(4), "aten::searchsorted(side) unsupported"); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(5), "aten::searchsorted(out) unsupported"); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(6), "aten::searchsorted(sorter) unsupported"); + auto op = context.mark_node(std::make_shared(sorted, values, right_mode)); + return {op}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/pytorch/src/op/stft.cpp b/src/frontends/pytorch/src/op/stft.cpp new file mode 100644 index 00000000000000..b7e4858c2f8fcc --- /dev/null +++ b/src/frontends/pytorch/src/op/stft.cpp @@ -0,0 +1,93 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/stft.hpp" + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_stft(const NodeContext& context) { + // schema: aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool + // normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor + // + // Note: aten::stft doesn't have "center" and "pad_mode" attrs like torch.stft, so the number of the inputs is lower + // and index of any input after the "window" is smaller accordingly + + num_inputs_check(context, 2, 8); + + auto input = context.get_input(0); + auto n_fft = context.get_input(1); + + ov::Output hop_length; + if (!context.input_is_none(2)) { + hop_length = context.get_input(2); + } else { + // Defualt floor(n_fft / 4) + const auto four = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 4)); + const auto four_cast = context.mark_node(std::make_shared(four, n_fft)); + hop_length = context.mark_node(std::make_shared(n_fft, four_cast)); + } + + ov::Output win_length; + if (!context.input_is_none(3)) { + win_length = context.get_input(3); + } else { + win_length = n_fft; + } + + ov::Output window; + if (!context.input_is_none(4)) { + window = context.get_input(4); + } else { + const auto one = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 1)); + const auto one_cast = context.mark_node(std::make_shared(one, input)); + const auto zero = context.mark_node(std::make_shared(ov::element::i32, Shape{1}, 0)); + const auto win_length_cast = + context.mark_node(std::make_shared(win_length, ov::element::i64)); + const auto win_len_vec = context.mark_node(std::make_shared(win_length_cast, zero)); + window = context.mark_node(std::make_shared(one_cast, win_len_vec)); + } + + bool normalized = false; + if (!context.input_is_none(5)) { + normalized = context.const_input(5); + } + PYTORCH_OP_CONVERSION_CHECK(!normalized, + "aten::stft conversion is currently supported with normalized=False only."); + + bool onesided = true; + if (!context.input_is_none(6)) { + onesided = context.const_input(6); + } + PYTORCH_OP_CONVERSION_CHECK(onesided, "aten::stft conversion is currently supported with onesided=True only."); + + bool return_complex = false; + if (!context.input_is_none(7)) { + return_complex = context.const_input(7); + } + PYTORCH_OP_CONVERSION_CHECK(!return_complex, + "aten::stft conversion is currently supported with return_complex=False only."); + + // Perform STFT + constexpr bool transpose_frames = true; + auto stft = context.mark_node(std::make_shared(input, window, n_fft, hop_length, transpose_frames)); + return {stft}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 5d63a6dc037b14..607f0bd32db80d 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -42,6 +42,7 @@ OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argmin); OP_CONVERTER(translate_as_strided); OP_CONVERTER(translate_as_tensor); +OP_CONVERTER(translate_atan2); OP_CONVERTER(translate_avg_pool1d); OP_CONVERTER(translate_avg_pool2d); OP_CONVERTER(translate_avg_pool3d); @@ -120,6 +121,7 @@ OP_CONVERTER(translate_inverse); OP_CONVERTER(translate_is_nonzero); OP_CONVERTER(translate_layer_norm); OP_CONVERTER(translate_len); +OP_CONVERTER(translate_lerp); OP_CONVERTER(translate_linalg_cross); OP_CONVERTER(translate_linalg_norm); OP_CONVERTER(translate_linalg_matrix_norm); @@ -219,6 +221,7 @@ OP_CONVERTER(translate_square); OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_std); OP_CONVERTER(translate_std_mean); +OP_CONVERTER(translate_stft); OP_CONVERTER(translate_sub); OP_CONVERTER(translate_sub_); OP_CONVERTER(translate_sum); @@ -299,6 +302,7 @@ OP_CONVERTER(translate_reshape_fx); OP_CONVERTER(translate_rsub_fx); OP_CONVERTER(translate_scalar_tensor_fx); OP_CONVERTER(translate_scaled_dot_product_attention_fx); +OP_CONVERTER(translate_search_sorted); OP_CONVERTER(translate_select_scatter_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_slice_scatter_fx); @@ -327,12 +331,15 @@ OP_CONVERTER(translate_zeros_like_fx); const std::unordered_map get_supported_ops_ts() { return { {"aten::__and__", op::translate_bitwise_and}, + {"aten::__iand__", op::inplace_op}, {"aten::__derive_index", op::translate_derive_index}, {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, {"aten::__or__", op::translate_bitwise_or}, + {"aten::__ior__", op::inplace_op}, {"aten::__range_length", op::translate_range_length}, {"aten::__xor__", op::translate_bitwise_xor}, + {"aten::__ixor__", op::inplace_op}, {"aten::_convolution", op::translate_convolution}, {"aten::_convolution_mode", op::translate_convolution_mode}, {"aten::_native_multi_head_attention", op::translate_native_multi_head_attention}, @@ -385,6 +392,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::atanh", op::optional_out, 1>}, {"aten::atanh_", op::inplace_op>}, + {"aten::atan2", op::translate_atan2}, {"aten::avg_pool1d", op::quantizable_op}, {"aten::avg_pool2d", op::quantizable_op}, {"aten::avg_pool3d", op::quantizable_op}, @@ -505,6 +513,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::le", op::translate_1to1_match_2_inputs_align_types}, {"aten::leaky_relu", op::translate_1to1_match_2_inputs}, {"aten::len", op::translate_len}, + {"aten::lerp", op::translate_lerp}, // lift op is torchscript specific op responsible for tensors coping with guarantee of new memory allocation {"aten::lift", op::skip_node}, {"aten::lift_fresh", op::skip_node}, @@ -615,6 +624,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::rsqrt", op::optional_out}, {"aten::rsqrt_", op::inplace_op}, {"aten::rsub", op::translate_rsub}, + {"aten::searchsorted", op::translate_search_sorted}, {"aten::ScalarImplicit", op::skip_node}, {"aten::scaled_dot_product_attention", op::translate_scaled_dot_product_attention}, {"aten::scatter", op::translate_scatter}, @@ -645,6 +655,7 @@ const std::unordered_map get_supported_ops_ts() { // aten::stack - Supported in limited set of patterns {"aten::std", op::translate_std}, {"aten::std_mean", op::translate_std_mean}, + {"aten::stft", op::translate_stft}, {"aten::sub", op::translate_sub}, {"aten::sub_", op::translate_sub_}, {"aten::sum", op::translate_sum}, @@ -742,6 +753,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten._native_batch_norm_legit.no_stats", op::translate_batch_norm_legit_no_stats_fx}, {"aten._native_batch_norm_legit_functional.default", op::translate_batch_norm_legit_fx}, {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, + {"aten._safe_softmax.default", op::translate_softmax_fx}, {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, {"aten._scaled_dot_product_flash_attention_for_cpu.default", op::translate_scaled_dot_product_attention_fx}, {"aten._softmax.default", op::translate_softmax_fx}, @@ -776,6 +788,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten.asinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.atan2.default", op::translate_atan2}, {"aten.avg_pool2d.default", op::translate_avg_pool2d}, {"aten.avg_pool3d.default", op::translate_avg_pool3d}, {"aten.baddbmm.default", op::translate_addmm_fx}, diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 5130424d0c60ed..797fa531c43b60 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -6,10 +6,12 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "utils.hpp" #include "utils_quantize.hpp" @@ -122,6 +124,79 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { }); }; +U4ConvertReshape::U4ConvertReshape() { + const auto& m_constant = wrap_type(type_matches(element::u4)); + const auto& m_convert = wrap_type({m_constant}); + + const auto& m_constant_1 = wrap_type(); + const auto& m_convert_1 = wrap_type({m_constant_1}); + const auto& m_constant_8 = wrap_type(); + const auto& m_convert_8 = wrap_type({m_constant_8}); + const auto& m_multiply = wrap_type({m_convert_8, m_convert_1}); + + const auto& m_converted_constant_8 = + std::make_shared(ov::OutputVector{m_multiply, m_convert_8}); + const auto& m_subtract = wrap_type({m_convert, m_converted_constant_8}); + const auto& m_converted_constant = + std::make_shared(ov::OutputVector{m_subtract, m_constant}); + const auto& m_reshape = wrap_type({m_converted_constant, any_input()}); + + register_matcher( + std::make_shared(m_reshape, "ov::frontend::pytorch::pass::U4ConvertReshape"), + [=](Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto u4_const = + std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + if (!u4_const) + return false; + + if (u4_const->get_element_type() != element::u4) + return false; + + auto reshape = pattern_to_output[m_reshape].get_node_shared_ptr(); + auto dst_shape = reshape->get_output_shape(0); + + std::shared_ptr new_const; + if (pattern_to_output.count(m_constant_8)) { + auto constant_8 = std::dynamic_pointer_cast( + pattern_to_output[m_constant_8].get_node_shared_ptr()); + if (ov::shape_size(constant_8->get_output_shape(0)) != 1 || + constant_8->get_output_element_type(0).is_real() || constant_8->cast_vector()[0] != 8) + return false; + + if (pattern_to_output.count(m_constant_1)) { + auto constant_1 = std::dynamic_pointer_cast( + pattern_to_output[m_constant_1].get_node_shared_ptr()); + if (ov::shape_size(constant_1->get_output_shape(0)) != 1 || + constant_1->get_output_element_type(0).is_real() || constant_1->cast_vector()[0] != 1) + return false; + } + + new_const = std::make_shared(element::i4, dst_shape); + auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + + auto src = u4_const->get_data_ptr(); + auto num_elements = ov::shape_size(u4_const->get_output_shape(0)); + + for (size_t i = 0; i < num_elements / 2; ++i) { + // subtracting 8 from 2 int4 elements + dst[i] = src[i] ^ 0b10001000; + } + } else { + new_const = std::make_shared(*u4_const, dst_shape); + } + + NodeVector pattern_nodes; + for (auto const& iout : pattern_to_output) + pattern_nodes.push_back(std::move(iout.first)); + + copy_runtime_info(pattern_nodes, new_const); + replace_node(reshape, new_const); + + return true; + }); +}; + } // namespace pass } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp index 6ab65a5d1c3838..99742ff148813a 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp @@ -18,6 +18,12 @@ class U4BlockRepack : public ov::pass::MatcherPass { U4BlockRepack(bool is_symmetrical = false); }; +class U4ConvertReshape : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::U4ConvertReshape"); + U4ConvertReshape(); +}; + } // namespace pass } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index e48c61314f4c0d..ad8f91bcda25a2 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -237,12 +237,15 @@ std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64 return nullptr; } - auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), + {"aten::bitwise_right_shift", "aten.bitwise_right_shift.Tensor_Scalar"}); if (!bitwise_shift) return nullptr; auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); - if (weights_u8 != std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0))) + auto weights_u8_bitwise_shift = + std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0)); + if (weights_u8->get_data_ptr() != weights_u8_bitwise_shift->get_data_ptr()) return nullptr; if (weights_u8->get_output_element_type(0) != element::u8) diff --git a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp index 34b2a82152ccfc..cbdc506671aa67 100644 --- a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp @@ -235,6 +235,9 @@ bool pass::SwitchMergeResolver::run_on_model(const shared_ptr& m) { auto else_body = make_shared(else_results, else_params); auto if_op = make_shared(cond); + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + if_op->get_rt_info()["tf_switch_merge_if"] = true; + set_cf_marker(if_cf_marker, if_op); if_op->set_then_body(then_body); if_op->set_else_body(else_body); diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 627314748bbe9c..5674c75dd546d7 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -580,6 +580,12 @@ static constexpr Property dynamic_quantization */ static constexpr Property kv_cache_precision{"KV_CACHE_PRECISION"}; +/** + * @brief This property scales down activations to prevent overflows when inference precision is f16. + * @ingroup ov_runtime_cpp_prop_api + */ +static constexpr Property activations_scale_factor{"ACTIVATIONS_SCALE_FACTOR"}; + } // namespace hint /** diff --git a/src/plugins/auto/src/auto_schedule.cpp b/src/plugins/auto/src/auto_schedule.cpp index ea5f2159179824..c504e8e4457870 100644 --- a/src/plugins/auto/src/auto_schedule.cpp +++ b/src/plugins/auto/src/auto_schedule.cpp @@ -239,8 +239,7 @@ void AutoSchedule::init() { std::pair worker; std::list