From 54348ff036d19a68c35cb0086c05b74580c472e4 Mon Sep 17 00:00:00 2001 From: ericharper Date: Fri, 3 Jun 2022 14:52:01 -0600 Subject: [PATCH 1/7] update container to 22.05 Signed-off-by: ericharper --- Dockerfile | 2 +- Jenkinsfile | 2 +- README.rst | 6 +++--- ci.groovy | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2e41f58e3887..4011453a4c6b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:22.04-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:22.05-py3 # build an image that includes only the nemo dependencies, ensures that dependencies diff --git a/Jenkinsfile b/Jenkinsfile index 5125567884b4..20a4394e1abd 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,7 +1,7 @@ pipeline { agent { docker { - image 'nvcr.io/nvidia/pytorch:22.04-py3' + image 'nvcr.io/nvidia/pytorch:22.05-py3' args '--device=/dev/nvidia0 --gpus all -e TRANSFORMERS_OFFLINE=1 --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g' } } diff --git a/README.rst b/README.rst index 5696519463cd..f7bb9c05c28d 100644 --- a/README.rst +++ b/README.rst @@ -202,7 +202,7 @@ Megatron GPT training requires NVIDIA Apex to be installed. git clone https://github.com/NVIDIA/apex cd apex - git checkout 9263bc8c6c16555bd55dd759f1a1b8c0cd187d10 + git checkout 5d8c8a8eedaf567d56f0762a45431baf9c0e800e pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" ./ Docker containers: @@ -214,13 +214,13 @@ To build a nemo container with Dockerfile from a branch, please run DOCKER_BUILDKIT=1 docker build -f Dockerfile -t nemo:latest . -If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 22.04-py3 and then installing from GitHub. +If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 22.05-py3 and then installing from GitHub. .. code-block:: bash docker run --gpus all -it --rm -v :/NeMo --shm-size=8g \ -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \ - stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:22.04-py3 + stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:22.05-py3 Examples -------- diff --git a/ci.groovy b/ci.groovy index 34ad0dd5dc69..fbb6392ba8a8 100644 --- a/ci.groovy +++ b/ci.groovy @@ -15,7 +15,7 @@ spec: path: /vol/scratch1/scratch.okuchaiev_blossom containers: - name: cuda - image: nvcr.io/nvidia/pytorch:22.04-py3 + image: nvcr.io/nvidia/pytorch:22.05-py3 command: - cat volumeMounts: From 158a0df91032364d25c9a38d6836f7a9101b087c Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 15:17:02 -0600 Subject: [PATCH 2/7] try adding safe directory Signed-off-by: ericharper --- Jenkinsfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 20a4394e1abd..e944018058cb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -12,6 +12,12 @@ pipeline { stages { + stage('Add git safe directory'){ + steps{ + sh 'git config --global --add safe.directory /var/lib/jenkins/workspace' + } + } + stage('nvidia-smi'){ steps{ sh 'nvidia-smi' From 5174c36915db7d10f5bc608ba920061acd1f01e1 Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 15:32:26 -0600 Subject: [PATCH 3/7] try env var Signed-off-by: ericharper --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e944018058cb..53bcf8dcbbbe 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,7 +14,8 @@ pipeline { stage('Add git safe directory'){ steps{ - sh 'git config --global --add safe.directory /var/lib/jenkins/workspace' + sh 'git config --global --add safe.directory $GIT_CHECKOUT_DIR' + sh 'cat ~/.gitconfig' } } From 32731da90608647c85a35bd586d108b0cf7d8598 Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 15:36:17 -0600 Subject: [PATCH 4/7] printenv Signed-off-by: ericharper --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 53bcf8dcbbbe..8fcdea347d5b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,6 +14,7 @@ pipeline { stage('Add git safe directory'){ steps{ + sh 'printenv' sh 'git config --global --add safe.directory $GIT_CHECKOUT_DIR' sh 'cat ~/.gitconfig' } From 2059878ce70071926c3704f57ad5ed2d78098da0 Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 15:43:33 -0600 Subject: [PATCH 5/7] try GIT_BRANCH Signed-off-by: ericharper --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 8fcdea347d5b..779c82808930 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,7 @@ pipeline { stage('Add git safe directory'){ steps{ sh 'printenv' - sh 'git config --global --add safe.directory $GIT_CHECKOUT_DIR' + sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NeMo_$GIT_BRANCH sh 'cat ~/.gitconfig' } } From d345b8e9159d89447890602b0b32b9ae747eb806 Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 16:09:24 -0600 Subject: [PATCH 6/7] typo Signed-off-by: ericharper --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 779c82808930..119f6e8a50b8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,7 @@ pipeline { stage('Add git safe directory'){ steps{ sh 'printenv' - sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NeMo_$GIT_BRANCH + sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NeMo_$GIT_BRANCH' sh 'cat ~/.gitconfig' } } From 8e535fa60ce3a472d11eecba4a5fa2cc52f63d08 Mon Sep 17 00:00:00 2001 From: ericharper Date: Mon, 6 Jun 2022 16:35:08 -0600 Subject: [PATCH 7/7] remove dbug statements Signed-off-by: ericharper --- Jenkinsfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 119f6e8a50b8..e4f2f47ffea1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,9 +14,7 @@ pipeline { stage('Add git safe directory'){ steps{ - sh 'printenv' sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NeMo_$GIT_BRANCH' - sh 'cat ~/.gitconfig' } }