diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index ba276c29..deb69a50 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -1,6 +1,6 @@ ARG OS_VERSION -FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu${OS_VERSION} +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu${OS_VERSION} ARG OS_VERSION diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml index 72cdb8af..6c506887 100644 --- a/.github/workflows/integration_test_4gpu.yaml +++ b/.github/workflows/integration_test_4gpu.yaml @@ -37,7 +37,7 @@ jobs: pip config --user set global.progress_bar off - python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 + python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 # install torchtitan to test the files in ./scripts, currently just for memory estimation python -m pip install -e . diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml index 0d8c79db..0b8f2a1f 100644 --- a/.github/workflows/integration_test_8gpu.yaml +++ b/.github/workflows/integration_test_8gpu.yaml @@ -36,6 +36,6 @@ jobs: pip config --user set global.progress_bar off - python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 + python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 mkdir artifacts-to-be-uploaded python ./test_runner.py artifacts-to-be-uploaded --ngpu 8