From 85bd6bc0105162293fa0bbfb7b661f85ec67f85a Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 1 May 2023 19:31:37 +0000 Subject: [PATCH] Cache pretrained mobilenet_v2 and mobilenet_v3_large models in Docker (#100302) Follow the example I did for ONNX in https://github.com/pytorch/pytorch/pull/96793, this caches the pretrained `mobilenet_v2 model` and `mobilenet_v3_large` used by CI jobs. I think there might be an issue either with AWS or with the domain download.pytorch.org as the connection to the latter has been failing a lots in the past few days. Related flaky jobs: * https://github.com/pytorch/pytorch/actions/runs/4835873487/jobs/8618836446 * https://github.com/pytorch/pytorch/actions/runs/4835783539/jobs/8618404639 * https://github.com/pytorch/pytorch/actions/runs/4835783539/jobs/8618404639 ``` Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /var/lib/jenkins/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth Traceback (most recent call last): File "/opt/conda/envs/py_3.8/lib/python3.8/urllib/request.py", line 1354, in do_open h.request(req.get_method(), req.selector, req.data, headers, File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 1256, in request self._send_request(method, url, body, headers, encode_chunked) File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 1302, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 1251, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 1011, in _send_output self.send(msg) File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 951, in send self.connect() File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 1418, in connect super().connect() File "/opt/conda/envs/py_3.8/lib/python3.8/http/client.py", line 922, in connect self.sock = self._create_connection( File "/opt/conda/envs/py_3.8/lib/python3.8/socket.py", line 808, in create_connection raise err File "/opt/conda/envs/py_3.8/lib/python3.8/socket.py", line 796, in create_connection sock.connect(sa) OSError: [Errno 99] Cannot assign requested address ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/100302 Approved by: https://github.com/ZainRizvi --- .ci/docker/centos-rocm/Dockerfile | 4 ++-- .ci/docker/common/cache_vision_models.sh | 18 ++++++++++++++++++ .ci/docker/common/install_android.sh | 3 +++ .ci/docker/common/install_vision.sh | 3 +++ .ci/docker/ubuntu-cuda/Dockerfile | 4 ++-- .ci/docker/ubuntu-rocm/Dockerfile | 4 ++-- .ci/docker/ubuntu/Dockerfile | 8 ++++---- 7 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 .ci/docker/common/cache_vision_models.sh diff --git a/.ci/docker/centos-rocm/Dockerfile b/.ci/docker/centos-rocm/Dockerfile index 3bd2ff66df330..4cdc96129c344 100644 --- a/.ci/docker/centos-rocm/Dockerfile +++ b/.ci/docker/centos-rocm/Dockerfile @@ -64,9 +64,9 @@ ENV INSTALLED_DB ${DB} # (optional) Install vision packages like OpenCV and ffmpeg ARG VISION -COPY ./common/install_vision.sh install_vision.sh +COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi -RUN rm install_vision.sh +RUN rm install_vision.sh cache_vision_models.sh common_utils.sh ENV INSTALLED_VISION ${VISION} # Install rocm diff --git a/.ci/docker/common/cache_vision_models.sh b/.ci/docker/common/cache_vision_models.sh new file mode 100644 index 0000000000000..136f968705bfb --- /dev/null +++ b/.ci/docker/common/cache_vision_models.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -ex + +source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" + +# Cache the test models at ~/.cache/torch/hub/ +IMPORT_SCRIPT_FILENAME="/tmp/torchvision_import_script.py" +as_jenkins echo 'import torchvision; torchvision.models.mobilenet_v2(pretrained=True); torchvision.models.mobilenet_v3_large(pretrained=True);' > "${IMPORT_SCRIPT_FILENAME}" + +pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu +# Very weird quoting behavior here https://github.com/conda/conda/issues/10972, +# so echo the command to a file and run the file instead +conda_run python "${IMPORT_SCRIPT_FILENAME}" + +# Cleaning up +conda_run pip uninstall -y torch torchvision +rm "${IMPORT_SCRIPT_FILENAME}" || true diff --git a/.ci/docker/common/install_android.sh b/.ci/docker/common/install_android.sh index 6e8d5ee9fa739..04d920b14fec4 100755 --- a/.ci/docker/common/install_android.sh +++ b/.ci/docker/common/install_android.sh @@ -107,3 +107,6 @@ chgrp -R jenkins /var/lib/jenkins/.gradle popd rm -rf /var/lib/jenkins/.gradle/daemon + +# Cache vision models used by the test +source "$(dirname "${BASH_SOURCE[0]}")/cache_vision_models.sh" diff --git a/.ci/docker/common/install_vision.sh b/.ci/docker/common/install_vision.sh index 09428a2f60f9e..faa190ad1e206 100755 --- a/.ci/docker/common/install_vision.sh +++ b/.ci/docker/common/install_vision.sh @@ -43,3 +43,6 @@ case "$ID" in exit 1 ;; esac + +# Cache vision models used by the test +source "$(dirname "${BASH_SOURCE[0]}")/cache_vision_models.sh" diff --git a/.ci/docker/ubuntu-cuda/Dockerfile b/.ci/docker/ubuntu-cuda/Dockerfile index c4b2a030bfd72..234ca21b07a91 100644 --- a/.ci/docker/ubuntu-cuda/Dockerfile +++ b/.ci/docker/ubuntu-cuda/Dockerfile @@ -58,9 +58,9 @@ ENV INSTALLED_DB ${DB} # (optional) Install vision packages like OpenCV and ffmpeg ARG VISION -COPY ./common/install_vision.sh install_vision.sh +COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi -RUN rm install_vision.sh +RUN rm install_vision.sh cache_vision_models.sh common_utils.sh ENV INSTALLED_VISION ${VISION} # (optional) Install UCC diff --git a/.ci/docker/ubuntu-rocm/Dockerfile b/.ci/docker/ubuntu-rocm/Dockerfile index fe8d8e0f531a7..aefa3a5b7501f 100644 --- a/.ci/docker/ubuntu-rocm/Dockerfile +++ b/.ci/docker/ubuntu-rocm/Dockerfile @@ -55,9 +55,9 @@ ENV INSTALLED_DB ${DB} # (optional) Install vision packages like OpenCV and ffmpeg ARG VISION -COPY ./common/install_vision.sh install_vision.sh +COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi -RUN rm install_vision.sh +RUN rm install_vision.sh cache_vision_models.sh common_utils.sh ENV INSTALLED_VISION ${VISION} # Install rocm diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index b70431c70dc04..4f3cb2184f9eb 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -86,20 +86,20 @@ ENV INSTALLED_DB ${DB} # (optional) Install vision packages like OpenCV and ffmpeg ARG VISION -COPY ./common/install_vision.sh install_vision.sh +COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi -RUN rm install_vision.sh +RUN rm install_vision.sh cache_vision_models.sh common_utils.sh ENV INSTALLED_VISION ${VISION} # (optional) Install Android NDK ARG ANDROID ARG ANDROID_NDK ARG GRADLE_VERSION -COPY ./common/install_android.sh install_android.sh +COPY ./common/install_android.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ COPY ./android/AndroidManifest.xml AndroidManifest.xml COPY ./android/build.gradle build.gradle RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi -RUN rm install_android.sh +RUN rm install_android.sh cache_vision_models.sh common_utils.sh RUN rm AndroidManifest.xml RUN rm build.gradle ENV INSTALLED_ANDROID ${ANDROID}