improve error and debug messages in model connector #7767
Triggered via pull request
January 29, 2025 00:32
Status
Failure
Total duration
2h 43m 21s
Artifacts
20
cicd-main.yml
on: pull_request
pre-flight
0s
L0_Unit_Tests_GPU_ASR
/
main
14m 28s
L0_Unit_Tests_GPU_Audio
/
main
1m 52s
L0_Unit_Tests_GPU_Common
/
main
3m 21s
L0_Unit_Tests_GPU_LLM
/
main
2m 40s
L0_Unit_Tests_GPU_Multimodal
/
main
2m 36s
L0_Unit_Tests_GPU_NLP
/
main
3m 45s
L0_Unit_Tests_GPU_TTS
/
main
2m 6s
L0_Unit_Tests_GPU_Hydra
/
main
3m 9s
L0_Unit_Tests_GPU_Lightning
/
main
3m 44s
L0_Unit_Tests_GPU_Others
/
main
4m 19s
L0_Unit_Tests_CPU_ASR
/
main
9m 19s
L0_Unit_Tests_CPU_Audio
/
main
1m 24s
L0_Unit_Tests_CPU_Common
/
main
2m 22s
L0_Unit_Tests_CPU_LLM
/
main
1m 13s
L0_Unit_Tests_CPU_Multimodal
/
main
1m 4s
L0_Unit_Tests_CPU_NLP
/
main
1m 40s
L0_Unit_Tests_CPU_TTS
/
main
2m 15s
L0_Unit_Tests_CPU_Core
/
main
4m 1s
L0_Unit_Tests_CPU_Hydra
/
main
1m 37s
L0_Unit_Tests_CPU_Lightning
/
main
1m 31s
L0_Unit_Tests_CPU_Others
/
main
1m 42s
cicd-import-tests
1m 27s
L2_Community_LLM_Checkpoints_tests_Bert
/
main
2m 8s
L2_Community_LLM_Checkpoints_tests_Mamba2
/
main
1m 53s
L2_Community_LLM_Checkpoints_tests_Llama
/
main
2m 8s
L2_Community_LLM_Checkpoints_tests_Llama3
/
main
1m 8s
L2_Community_LLM_Checkpoints_tests_StarCoder
/
main
1m 58s
L2_Community_LLM_Checkpoints_tests_Falcon
/
main
1m 53s
L2_Community_vita_Checkpoints_tests_Llama3
/
main
4m 7s
L2_PTQ_Llama2_FP8
/
main
2m 58s
L2_Distill_Llama2
/
main
3m 27s
L2_Prune_Width_Llama2
/
main
2m 34s
L2_Prune_Depth_Llama2
/
main
2m 13s
ASR_dev_run_Speech_to_Text
/
main
1m 5s
ASR_dev_run_Speech_to_Text_WPE_-_CitriNet
/
main
1m 10s
ASR_dev_run_Speech_Pre-training_-_CitriNet
/
main
53s
ASR_dev_run_Speech_To_Text_Finetuning
/
main
1m 0s
ASR_dev_run_Speech_To_Text_HF_Finetuning
/
main
2m 32s
ASR_dev_run_Speech_to_Text_WPE_-_Conformer
/
main
1m 40s
ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer
/
main
53s
L2_Speech_to_Text_EMA
/
main
2m 2s
L2_Speech_to_Text_AED
/
main
1m 47s
L2_Speaker_dev_run_Speaker_Recognition
/
main
50s
L2_Speaker_dev_run_Speaker_Diarization
/
main
54s
L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer
/
main
1m 47s
L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference
/
main
1m 40s
L2_Speaker_dev_run_Speech_to_Label
/
main
52s
L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference
/
main
1m 40s
L2_Speaker_dev_run_Clustering_Diarizer_Inference
/
main
1m 52s
L2_Speaker_dev_run_Neural_Diarizer_Inference
/
main
1m 51s
L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation
/
main
1m 40s
L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader
/
main
1m 55s
L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader
/
main
48s
L2_ASR_Adapters_Linear_Adapters
/
main
1m 39s
L2_ASR_Adapters_RelPos_MHA_Adapters
/
main
51s
L2_Speech_Estimate_Duration_Bins
/
main
1m 57s
L2_Speech_Batch_Size_OOMptimizer
/
main
2m 52s
L2_Speech_Batch_Size_OOMptimizer_Canary
/
main
2m 13s
L2_Speech_Transcription_Speech_to_Text_Transcribe
/
main
1m 36s
L2_Speech_Transcription_Canary_Transcribe_Full_Manifest
/
main
2m 52s
L2_Speech_Transcription_Canary_Transcribe_With_Prompt
/
main
2m 55s
L2_Speech_Transcription_Canary_Transcribe_Audio_Dir
/
main
2m 49s
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav
/
main
3m 45s
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3
/
main
3m 14s
L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference
/
main
2m 9s
L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference
/
main
2m 53s
L2_Pretraining_BERT_pretraining_from_Text
/
main
59s
L2_Pretraining_BERT_from_Preprocessed
/
main
1m 4s
L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN
/
main
1m 27s
L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN
/
main
1m 8s
L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation
/
main
1m 21s
L2_NMT_Attention_is_All_You_Need_Inference
/
main
2m 2s
L2_NMT_Attention_is_All_You_Need_Finetuning
/
main
1m 54s
L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation
/
main
1m 3s
L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation
/
main
1m 55s
L2_Megatron_NMT_Training_TP2
/
main
4m 42s
L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism
/
main
4m 7s
L2_Megatron_Core_Bert_Pretraining_and_Resume_Training
/
main
4m 26s
L2_RAG_Pipeline_Indexing
/
main
2m 3s
L2_RAG_Pipeline_Generating
/
main
2m 30s
L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2
/
main
3m 55s
L2_Megatron_GPT_Skip_Train
/
main
3m 24s
L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2
/
main
3m 43s
L2_Megatron_LM_To_NeMo_Conversion
/
main
4m 24s
L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2
/
main
4m 50s
L2_Megatron_GPT_with_Drop_Optimizer_States_TP2
/
main
3m 50s
L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2
/
main
3m 41s
L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2
/
main
3m 27s
L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124
/
main
2m 41s
L2_Megatron_GPT_Finetuning_PP2
/
main
4m 50s
L2_Megatron_GPT_Finetuning_StarCoder_PP1
/
main
1m 30s
L2_Megatron_GPT_Reranker
/
main
1m 56s
L2_Megatron_GPT_Embedding
/
main
2m 25s
L2_Megatron_GPT_PEFT_Lora_PP2_O2
/
main
3m 8s
L2_Megatron_GPT_PEFT_Lora_TP2_O1
/
main
2m 58s
L2_Megatron_GPT_PEFT_Lora_TP2SP1
/
main
1m 11s
L2_Megatron_GPT_Eval
/
main
2m 2s
L2_Megatron_GPT_Eval_PP2
/
main
2m 47s
L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len
/
main
2m 12s
L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2
/
main
2m 22s
L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2
/
main
2m 16s
L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2
/
main
3m 51s
L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2
/
main
3m 17s
L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2
/
main
3m 16s
L2_Megatron_T5_w_Mixture_of_Expert_Pretraining
/
main
2m 32s
L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2
/
main
3m 29s
L2_Megatron_Core_T5_Eval
/
main
2m 7s
L2_Megatron_Core_T5_PEFT_Lora_TP2
/
main
4m 12s
L2_VLM_HF_Transformer_PEFT
/
main
1m 36s
L2_VLM_HF_Transformer_PEFT_FSDP
/
main
2m 58s
L2_VLM_HF_Transformer_PEFT_4bit
/
main
1m 40s
L2_VLM_HF_Transformer_SFT_FSDP2
/
main
52s
L2_HF_Transformer_PEFT
/
main
1m 42s
L2_HF_Transformer_PEFT_nemorun
/
main
2m 1s
L2_HF_Transformer_PEFT_2gpu
/
main
3m 0s
L2_HF_Transformer_PEFT_2gpu_nemorun
/
main
3m 4s
L2_HF_Transformer_SFT_2gpu
/
main
6m 50s
L2_HF_Transformer_SFT_FSDP2_2gpu
/
main
1m 58s
L2_HF_Transformer_PT_2gpu
/
main
6m 11s
L2_HF_Transformer_SFT_2gpu_nemorun
/
main
3m 46s
L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2
/
main
1m 53s
L2_HF_Transformer_PT_2gpu_nemorun
/
main
3m 11s
L2_HF_Transformer_PT
/
main
4m 43s
L2_HF_Transformer_PT_nemorun
/
main
2m 0s
L2_HF_Transformer_SFT
/
main
5m 15s
L2_HF_Transformer_SFT_nemorun
/
main
2m 35s
L2_HF_Transformer_SFT_TE_Acceleration
/
main
5m 59s
L2_HF_Transformer_PT_TE_Acceleration
/
main
5m 27s
L2_Megatron_Mock_Data_Generation_MockGPTDataset
/
main
3m 25s
L2_Megatron_Mock_Data_Generation_MockT5Dataset
/
main
2m 15s
L2_TTS_Fast_dev_runs_1_Tacotron_2
/
main
1m 12s
L2_TTS_Fast_dev_runs_1_WaveGlow
/
main
1m 47s
L2_TTS_Fast_dev_runs_1_FastPitch
/
main
2m 5s
L2_TTS_Fast_dev_runs_1_Hifigan
/
main
1m 49s
Speech_Checkpoints_tests
/
main
2m 52s
L2_Stable_Diffusion_Training
/
main
5m 12s
L2_NeMo_2_GPT_Pretraining_no_transformer_engine
/
main
4m 1s
L2_NeMo_2_llama3_pretraining_recipe
/
main
4m 45s
L2_NeMo_2_llama3_fault_tolerance_plugin
/
main
7m 41s
L2_NeMo_2_llama3_straggler_detection
/
main
3m 5s
L2_NeMo_2_GPT_DDP_Param_Parity_check
/
main
3m 19s
L2_NeMo_2_SSM_Pretraining
/
main
4m 0s
L2_NeMo_2_SSM_Finetuning
/
main
3m 31s
L2_NeMo_2_HF_MODEL_IMPORT
/
main
4m 46s
L2_NeMo_2_jit_callback
/
main
1m 43s
L2_NeMo_2_T5_Pretraining
/
main
5m 35s
L2_NeMo_2_T5_Finetuning
/
main
4m 34s
L2_NeMo_2_T5_LoRA
/
main
4m 39s
L2_NeMo_2_NEVA_MOCK_TRAINING
/
main
2m 44s
L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING
/
main
2m 52s
L2_NeMo_2_MLLAMA_MOCK_TRAINING
/
main
8m 0s
L2_NeMo_2_Mixtral_Pretraining
/
main
2m 12s
L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
/
main
4m 27s
L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
/
main
4m 38s
L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
/
main
4m 36s
L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
/
main
4m 30s
L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
/
main
4m 23s
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
/
main
3m 46s
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
/
main
3m 45s
L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
/
main
4m 3s
L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
/
main
3m 50s
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
/
main
3m 37s
L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
/
main
3m 34s
L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
/
main
3m 35s
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
/
main
3m 59s
L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
/
main
2m 27s
L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
/
main
2m 30s
L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
/
main
2m 21s
L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
/
main
2m 39s
L2_NEMO_2_LoRA_MERGE
/
main
1m 58s
L2_NEMO_2_LoRA_Export
/
main
50s
L2_NEMO_2_LoRA_Inference
/
main
59s
L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
/
main
3m 16s
L2_NeMo_2_PTQ_Llama2_FP8
/
main
3m 7s
L2_NeMo_2_Export_In_Framework
/
main
3m 6s
L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
/
main
1m 55s
L2_NeMo_2_VLLM_EXPORT
/
main
4m 12s
OPTIONAL_L0_Unit_Tests_GPU_Core
/
main
21m 18s
L0_Setup_Test_Data_And_Models
/
main
1m 40s
OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2
/
main
4m 14s
OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2
/
main
3m 13s
OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
/
main
2m 37s
L0_Unit_Tests_Coverage
21s
Nemo_CICD_Test
5s
Annotations
2 errors
OPTIONAL_L0_Unit_Tests_GPU_Core / main
The action 'Run main script' has timed out after 20 minutes.
|
OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2 / main
Process completed with exit code 137.
|
Artifacts
Produced during runtime
Name | Size | Digest | |
---|---|---|---|
coverage-13022195636-0674f7e9-3d5d-46c4-86c6-26583e67a231
Expired
|
62.4 KB |
sha256:2bb0caba88de947ac0c30fb24a7bdefa7f317e0e5a2ab90b9359d9a19fe5a6c0
|
|
coverage-13022195636-07f14853-27c1-489e-b56d-7ce6999c8cb2
Expired
|
77 KB |
sha256:1618e55da6e3846d478b61b0fa86a3aa8603fea88a883daf062d4595627a3ed4
|
|
coverage-13022195636-2fec6789-3fb2-40b2-8f4b-cbc9247f68da
Expired
|
61.3 KB |
sha256:82c946fabf5404bcbff4b44c028df3675e7da90ebf1d9696ced549cb61de2295
|
|
coverage-13022195636-395bef9e-dfa8-49ce-8d44-ca8c0c0ca545
Expired
|
76.9 KB |
sha256:2b80f3eb8d9b63dcf8bf6004611ed618e4d26b82e2a2a30fb1b642c832662ed1
|
|
coverage-13022195636-3d6ebea7-4def-4b83-8b72-02e1a66d0f7e
Expired
|
77 KB |
sha256:a34125c9c3b13e0fc648833afccc5eb1cb857cf6628f3892679d28d49df6fe90
|
|
coverage-13022195636-4142a872-ad76-4032-9259-fd5d36dc77fc
Expired
|
77.9 KB |
sha256:661b7ad8437a484e5907ee23ee88ec5c9a05e28b1d20047852e0e9221d5ab100
|
|
coverage-13022195636-52271bc6-fc7b-4702-8de7-a78d6d27bee3
Expired
|
50.7 KB |
sha256:a47982a09db5ea5eec3c13200d0cd250401adab1c582409283243aa7bc634896
|
|
coverage-13022195636-604e3857-6be0-4032-9655-87c8af1b50dc
Expired
|
77.7 KB |
sha256:ec838f78eeab8c4f44df6fe0af1e3d10f91f5a2fb505d8c04fc59d1ea9459215
|
|
coverage-13022195636-64a20143-08eb-4af5-a1b6-a1ce89dc9c94
Expired
|
67.3 KB |
sha256:e0d781a8826e943d0cb12bfe558137346945b29820aa251fbeb1e6004cf03ead
|
|
coverage-13022195636-6dcac780-56a3-4f0e-bc1f-63e33bb229fc
Expired
|
64 KB |
sha256:48e2ca1fe859770ff3e8af4594905f5f18c6834788c871dd4d04367b703c0ab7
|
|
coverage-13022195636-798d611a-23c5-487d-9763-58a99def86ac
Expired
|
76.6 KB |
sha256:e0124a44487cc4504e020b38c116087392771f3eb1d42ff3238bcf6564b5aa98
|
|
coverage-13022195636-7ca6a093-ef94-4d70-8771-28d44842e089
Expired
|
64.5 KB |
sha256:43ebc452469ef4a3448dc72ff26c9d41e477c09942e7a2ff82ab976a0b3065ed
|
|
coverage-13022195636-8317a6c8-b479-495b-8586-1eaa111b189f
Expired
|
64.7 KB |
sha256:acfc337d287e119d57a8a7ff365960d8c0ff46cac9ed79b0b7ab0ba056aee2d6
|
|
coverage-13022195636-85b4ee67-465f-4b5e-85cd-8fa9ce471aed
Expired
|
78.3 KB |
sha256:027666d23412f25bc6dd13f1d3c41f82b8f89582fc85dfa0ec899981b0e58c9b
|
|
coverage-13022195636-ad2e6f6a-748f-4fbf-9413-abb7627b621b
Expired
|
78.3 KB |
sha256:625cbb636f29b762319fd7100526f8ac5207ce3ef99e60c17ff9646336f0b2cf
|
|
coverage-13022195636-b11dbcbd-2e1d-49cd-8a04-f9e9d85ec48e
Expired
|
51.2 KB |
sha256:a1860c5f3ae7b5702bbefc340456acf92ab3eea9c5ccd562869084b7fbccbeb5
|
|
coverage-13022195636-b4791c8c-e384-40ae-8f28-e338e01f1dd1
Expired
|
65.1 KB |
sha256:4d8009637340fae304527bb2acd245cb5e4dd05c88febc0e55e7107aa8a0a912
|
|
coverage-13022195636-b974f038-dd7b-4c3f-8269-aa08b50028e5
Expired
|
76.5 KB |
sha256:d275ce307e4c03d75e4d5e6d4a5e8bf3033b30fb08ddc81abce0db78f87b2079
|
|
coverage-13022195636-da8d074b-5c9d-4672-bf18-405920e6a12a
Expired
|
77.2 KB |
sha256:f032df26bc6bf71dde061d4869a6e443c6b8f8cb505117d69fa0e590a452fcb9
|
|
coverage-13022195636-e8ba1242-db18-4bb9-b379-fe96fc0c1e32
Expired
|
76.8 KB |
sha256:0afd657e5bc242b6a8c7db42a54f34e5bf07bd17c507c39260b977b9bb8952ee
|
|