Move TORCH_NCCL_HIGH_PRIORITY to nemo/lightning/run/plugins.py

guyueh1 · guyueh1 · commit 1830c01a9de0 · 2025-07-03T09:21:18.000-07:00
Signed-off-by: Guyue Huang &lt;guyueh@nvidia.com&gt;
diff --git a/nemo/lightning/run/plugins.py b/nemo/lightning/run/plugins.py
@@ -405,6 +405,9 @@ def setup(self, task: run.Partial | run.Script, executor: run.Executor):
                 assert isinstance(self.nccl_pp_comm_chunksize, int) and self.nccl_pp_comm_chunksize > 1
                 executor.env_vars["NCCL_P2P_NET_CHUNKSIZE"] = str(self.nccl_pp_comm_chunksize)
 
+            # Enable high priority for NCCL communications
+            executor.env_vars["TORCH_NCCL_HIGH_PRIORITY"] = "1"
+
         # Improve perf by steering power to tensor cores, may not work on all systems
         if self.enable_vboost and isinstance(executor, run.SlurmExecutor):
             vboost_cmd = self.get_vboost_srun_cmd(executor.nodes, executor.tunnel.job_dir)
diff --git a/scripts/performance/executors.py b/scripts/performance/executors.py
@@ -66,7 +66,6 @@ def slurm_executor(
         "NVTE_FLASH_ATTN": "1",  # Enable Flash Attention, which is needed to enable cuDNN fused attention
         "NVTE_FUSED_ATTN": "1",  # Enable cuDNN fused attention
         "NEMO_LOG_MEMORY_USAGE": "1",  # Print memory allocation
-        "TORCH_NCCL_HIGH_PRIORITY": "1",  # Enable high priority for NCCL communication in pytorch
     }
 
     custom_bash_cmds = [] if custom_bash_cmds is None else custom_bash_cmds

Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,6 @@ def slurm_executor(`
`66`	`66`	`"NVTE_FLASH_ATTN": "1", # Enable Flash Attention, which is needed to enable cuDNN fused attention`
`67`	`67`	`"NVTE_FUSED_ATTN": "1", # Enable cuDNN fused attention`
`68`	`68`	`"NEMO_LOG_MEMORY_USAGE": "1", # Print memory allocation`
`69`		`- "TORCH_NCCL_HIGH_PRIORITY": "1", # Enable high priority for NCCL communication in pytorch`
`70`	`69`	`}`
`71`	`70`
`72`	`71`	`custom_bash_cmds = [] if custom_bash_cmds is None else custom_bash_cmds`