Skip to content

Commit

Permalink
Reduce storage usage in favor of local storage (#10767)
Browse files Browse the repository at this point in the history
* readonly CI mounts

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* use /tmp/ instead of TestData for writing outputs

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* fix path

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* attempt: dont use :ro

* revert for failing test

* revert for failing test

* revert back

* revert back

* revert back

* revert back

* revert back

* revert back

* Flaky test optional

---------

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Co-authored-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
  • Loading branch information
pablo-garay and akoumpa authored Oct 9, 2024
1 parent c3bfcd7 commit 5e22a30
Showing 1 changed file with 38 additions and 84 deletions.
122 changes: 38 additions & 84 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,16 @@ jobs:
--ignore=tests/utils
# L0: CPU unit tests
L0_Unit_Tests_CPU_ASR:
OPTIONAL_L0_Unit_Tests_CPU_ASR:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
IS_OPTIONAL: true

L0_Unit_Tests_CPU_Audio:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -456,17 +457,18 @@ jobs:
with:
RUNNER: self-hosted-azure
SCRIPT: |
mkdir /tmp/${{ github.run_id }}
export PYTHONPATH=/home/TestData/multimodal/video_neva/LLaVA:$PYTHONPATH
CUDA_VISIBLE_DEVICES=0 python examples/multimodal/multimodal_llm/neva/convert_llava_to_neva.py \
--in-file /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/llm \
--mm-projector-ckpt-dir /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/mm_projector \
--mm-vision-tower /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/vision_tower \
--tokenizer-model /home/TestData/multimodal/video_neva/vita-tokenizer/ \
--config-file vita_config.yaml \
--out-file=/tmp/multimodal_video_neva_llama3-ci-hf/ \
--out-file=/tmp/${{ github.run_id }}/llama3_ci.nemo \
--model-type VITA \
--conv-template llama_3
# this test is using a 7B model which is too large for GitHub CI
# replace the model in this test with a toy model or move the test
# to the nightly CI
Expand Down Expand Up @@ -637,9 +639,7 @@ jobs:
model.optim.sched.warmup_steps=1 \
model.data.data_prefix=[1.0,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
exp_manager.exp_dir=examples/nlp/megatron_llama_distill
AFTER_SCRIPT: |
rm -rf examples/nlp/megatron_llama_distill
exp_manager.exp_dir=/tmp/megatron_llama_distill
L2_Prune_Width_Llama2:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -678,9 +678,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_results
exp_manager.exp_dir=/tmp/speech_to_text_results
ASR_dev_run_Speech_to_Text_WPE_-_CitriNet:
needs: [cicd-test-container-setup]
Expand All @@ -698,9 +696,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_wpe_results
exp_manager.exp_dir=/tmp/speech_to_text_wpe_results
ASR_dev_run_Speech_Pre-training_-_CitriNet:
needs: [cicd-test-container-setup]
Expand All @@ -716,9 +712,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_pre_training_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_pre_training_results
exp_manager.exp_dir=/tmp/speech_pre_training_results
ASR_dev_run_Speech_To_Text_Finetuning:
needs: [cicd-test-container-setup]
Expand All @@ -736,9 +730,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_finetuning_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_finetuning_results
exp_manager.exp_dir=/tmp/speech_finetuning_results
ASR_dev_run_Speech_To_Text_HF_Finetuning:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -774,9 +766,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_finetuning_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_finetuning_results
exp_manager.exp_dir=/tmp/speech_finetuning_results
ASR_dev_run_Speech_to_Text_WPE_-_Conformer:
needs: [cicd-test-container-setup]
Expand All @@ -796,9 +786,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_conformer_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_wpe_conformer_results
exp_manager.exp_dir=/tmp/speech_to_text_wpe_conformer_results
# L2: ASR dev run - part two
ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer:
Expand All @@ -820,9 +808,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_squeezeformer_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_wpe_squeezeformer_results
exp_manager.exp_dir=/tmp/speech_to_text_wpe_squeezeformer_results
L2_Speech_to_Text_EMA:
needs: [cicd-test-container-setup]
Expand All @@ -838,9 +824,7 @@ jobs:
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
+exp_manager.ema.enable=True \
exp_manager.exp_dir=examples/asr/speech_to_text_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_results
exp_manager.exp_dir=/tmp/speech_to_text_results
L2_Speech_to_Text_AED:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -879,9 +863,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_aed_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_results
exp_manager.exp_dir=/tmp/speech_to_text_aed_results
# L2: Speaker dev run
L2_Speaker_dev_run_Speaker_Recognition:
Expand All @@ -901,9 +883,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/speaker_tasks/recognition/speaker_recognition_results
AFTER_SCRIPT: |
rm -rf examples/speaker_tasks/recognition/speaker_recognition_results
exp_manager.exp_dir=/tmp/speaker_recognition_results
L2_Speaker_dev_run_Speaker_Diarization:
needs: [cicd-test-container-setup]
Expand All @@ -923,9 +903,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/speaker_tasks/diarization/speaker_diarization_results
AFTER_SCRIPT: |
rm -rf examples/speaker_tasks/diarization/speaker_diarization_results
exp_manager.exp_dir=/tmp/speaker_diarization_results
L2_Speaker_dev_run_Speech_to_Label:
needs: [cicd-test-container-setup]
Expand All @@ -948,9 +926,7 @@ jobs:
~model.preprocessor.n_mels \
~model.preprocessor.n_mfcc \
~model.preprocessor.n_fft \
exp_manager.exp_dir=examples/asr/speech_to_label_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_label_results
exp_manager.exp_dir=/tmp/speech_to_label_results
L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference:
needs: [cicd-test-container-setup]
Expand All @@ -968,9 +944,7 @@ jobs:
diarizer.speaker_embeddings.parameters.multiscale_weights=[1.0] \
diarizer.asr.model_path=QuartzNet15x5Base-En \
diarizer.asr.parameters.asr_based_vad=True \
diarizer.out_dir=examples/speaker_tasks/diarization/speaker_diarization_asr_results
AFTER_SCRIPT: |
rm -rf examples/speaker_tasks/diarization/speaker_diarization_asr_results
diarizer.out_dir=/tmp/speaker_diarization_asr_results
L2_Speaker_dev_run_Clustering_Diarizer_Inference:
needs: [cicd-test-container-setup]
Expand All @@ -987,9 +961,7 @@ jobs:
diarizer.speaker_embeddings.parameters.shift_length_in_sec=0.75 \
diarizer.speaker_embeddings.parameters.multiscale_weights=null \
diarizer.vad.model_path=/home/TestData/an4_diarizer/MatchboxNet_VAD_3x2.nemo \
diarizer.out_dir=examples/speaker_tasks/diarization/clustering_diarizer_results
AFTER_SCRIPT: |
rm -rf examples/speaker_tasks/diarization/clustering_diarizer_results
diarizer.out_dir=/tmp/clustering_diarizer_results
L2_Speaker_dev_run_Neural_Diarizer_Inference:
needs: [cicd-test-container-setup]
Expand All @@ -1003,9 +975,7 @@ jobs:
diarizer.msdd_model.model_path=/home/TestData/an4_diarizer/diar_msdd_telephonic.nemo \
diarizer.speaker_embeddings.parameters.save_embeddings=True \
diarizer.vad.model_path=/home/TestData/an4_diarizer/MatchboxNet_VAD_3x2.nemo \
diarizer.out_dir=examples/speaker_tasks/diarization/neural_diarizer_results
AFTER_SCRIPT: |
rm -rf examples/speaker_tasks/diarization/neural_diarizer_results
diarizer.out_dir=/tmp/neural_diarizer_results
L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation:
needs: [cicd-test-container-setup]
Expand All @@ -1018,11 +988,9 @@ jobs:
--config-path=conf --config-name=data_simulator.yaml \
data_simulator.random_seed=42 \
data_simulator.manifest_filepath=/home/TestData/LibriSpeechShort/dev-clean-align-short.json \
data_simulator.outputs.output_dir=./test_simulator \
data_simulator.outputs.output_dir=/tmp/test_simulator \
data_simulator.session_config.num_sessions=2 \
data_simulator.session_config.session_length=60
AFTER_SCRIPT: |
rm -rf ./test_simulator
# L2: ASR Multi-dataloader dev run
L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader:
Expand All @@ -1040,9 +1008,7 @@ jobs:
trainer.max_epochs=1 \
trainer.max_steps=1 \
+trainer.num_sanity_val_steps=1 \
exp_manager.exp_dir=examples/asr/speech_to_text_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_results
exp_manager.exp_dir=/tmp/speech_to_text_results
L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader:
needs: [cicd-test-container-setup]
Expand All @@ -1066,9 +1032,7 @@ jobs:
~model.preprocessor.n_mels \
~model.preprocessor.n_mfcc \
~model.preprocessor.n_fft \
exp_manager.exp_dir=examples/asr/speech_to_label_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_label_results
exp_manager.exp_dir=/tmp/speech_to_label_results
# L2: ASR Adapters
L2_ASR_Adapters_Linear_Adapters:
Expand All @@ -1088,9 +1052,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_adapters_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_adapters_results
exp_manager.exp_dir=/tmp/speech_to_text_adapters_results
L2_ASR_Adapters_RelPos_MHA_Adapters:
needs: [cicd-test-container-setup]
Expand All @@ -1110,9 +1072,7 @@ jobs:
trainer.devices=1 \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_adapters_mha_results
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_adapters_mha_results
exp_manager.exp_dir=/tmp/speech_to_text_adapters_mha_results
# L2: OOMptimizer
L2_Speech_Estimate_Duration_Bins:
Expand Down Expand Up @@ -1189,10 +1149,8 @@ jobs:
python examples/asr/transcribe_speech.py \
pretrained_name="QuartzNet15x5Base-En" \
audio_dir="/home/TestData/an4_transcribe/test_subset/" \
output_filename="stt_test_res.json" \
output_filename="/tmp/stt_test_res.json" \
amp=true
AFTER_SCRIPT: |
rm -rf stt_test_res.json
# L2: Speech Transcription
L2_Speech_Transcription_Canary_Transcribe_Full_Manifest:
Expand All @@ -1204,15 +1162,15 @@ jobs:
SCRIPT: |
python examples/asr/transcribe_speech.py \
dataset_manifest=/home/TestData/asr/canary/dev-other-wav-10-canary-fields.json \
output_filename=preds.json \
output_filename=/tmp/preds.json \
batch_size=10 \
pretrained_name=nvidia/canary-1b \
num_workers=0 \
amp=false \
compute_dtype=bfloat16 \
matmul_precision=medium
AFTER_SCRIPT: |
rm -rf preds.json transcribe.log
rm -rf /tmp/preds.json transcribe.log
L2_Speech_Transcription_Canary_Transcribe_With_Prompt:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -1281,14 +1239,12 @@ jobs:
/bin/bash run_segmentation.sh \
--MODEL_NAME_OR_PATH="stt_en_citrinet_512_gamma_0_25" \
--DATA_DIR=/home/TestData/ctc_segmentation/eng \
--OUTPUT_DIR=/home/TestData/ctc_segmentation/eng/output${TIME} \
--OUTPUT_DIR=/tmp/ctc_seg_en/output${TIME} \
--LANGUAGE=en \
--USE_NEMO_NORMALIZATION="TRUE" && \
python /home/TestData/ctc_segmentation/verify_alignment.py \
-r /home/TestData/ctc_segmentation/eng/eng_valid_segments_1.7.txt \
-g /home/TestData/ctc_segmentation/eng/output${TIME}/verified_segments/nv_test_segments.txt;
AFTER_SCRIPT: |
rm -rf /home/TestData/ctc_segmentation/eng/output${TIME}
-g /tmp/ctc_seg_en/output${TIME}/verified_segments/nv_test_segments.txt;
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3:
needs: [cicd-test-container-setup]
Expand All @@ -1302,14 +1258,12 @@ jobs:
/bin/bash run_segmentation.sh \
--MODEL_NAME_OR_PATH=/home/TestData/ctc_segmentation/QuartzNet15x5-Ru-e512-wer14.45.nemo \
--DATA_DIR=/home/TestData/ctc_segmentation/ru \
--OUTPUT_DIR=/home/TestData/ctc_segmentation/ru/output${TIME} \
--OUTPUT_DIR=/tmp/ctc_seg_ru/output${TIME} \
--LANGUAGE=ru \
--ADDITIONAL_SPLIT_SYMBOLS=";" && \
python /home/TestData/ctc_segmentation/verify_alignment.py \
-r /home/TestData/ctc_segmentation/ru/valid_ru_segments_1.7.txt \
-g /home/TestData/ctc_segmentation/ru/output${TIME}/verified_segments/ru_segments.txt;
rm -rf /home/TestData/ctc_segmentation/eng/output${TIME}
-g /tmp/ctc_seg_ru/output${TIME}/verified_segments/ru_segments.txt;
# L2: G2P Models
L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference:
Expand Down Expand Up @@ -1687,9 +1641,9 @@ jobs:
model.tokenizer.tokenizer_model=/home/TestData/nlp/wikitext-2/tokenizer_bpe_v3193/tokenizer.model \
model.mask_prob=0.15 \
model.short_seq_prob=0.1 \
exp_manager.exp_dir=PretrainingBERTFromText;
AFTER_SCRIPT: |
rm -f /home/TestData/nlp/wikitext-2/*.pkl
exp_manager.exp_dir=/tmp/PretrainingBERTFromText;
# AFTER_SCRIPT: |
# rm -f /home/TestData/nlp/wikitext-2/*.pkl
#rm -rf examples/nlp/language_modeling/PretrainingBERTFromText

L2_Pretraining_BERT_from_Preprocessed:
Expand Down Expand Up @@ -5245,7 +5199,7 @@ jobs:
#- OPTIONAL_L0_Unit_Tests_GPU_Lightning
- L0_Unit_Tests_GPU_Others

- L0_Unit_Tests_CPU_ASR
#- OPTIONAL_L0_Unit_Tests_CPU_ASR
- L0_Unit_Tests_CPU_Audio
- L0_Unit_Tests_CPU_Common
- L0_Unit_Tests_CPU_LLM
Expand Down

0 comments on commit 5e22a30

Please sign in to comment.