add and fix test for deepspeed / fp8 from config

pstjohn · pstjohn · commit edbe9d569c29 · 2025-07-21T14:36:23.000-07:00
diff --git a/src/accelerate/state.py b/src/accelerate/state.py
@@ -945,7 +945,11 @@ def __init__(
                     "before using any functionality from the `accelerate` library."
                 )
             # deepspeed handles mixed_precision using deepspeed_config
-            self._mixed_precision = "no" if self.distributed_type == DistributedType.DEEPSPEED else mixed_precision
+            self._mixed_precision = (
+                "no"
+                if (self.distributed_type == DistributedType.DEEPSPEED and mixed_precision != "fp8")
+                else mixed_precision
+            )
             if self.distributed_type == DistributedType.XLA and is_torch_xla_available(check_is_tpu=True):
                 if mixed_precision == "bf16":
                     if os.environ.get("ACCELERATE_DOWNCAST_BF16"):
@@ -1035,7 +1039,7 @@ def _check_initialized(self, mixed_precision=None, cpu=None):
 
     @property
     def mixed_precision(self):
-        if self.distributed_type == DistributedType.DEEPSPEED:
+        if self.distributed_type == DistributedType.DEEPSPEED and self._mixed_precision != "fp8":
             config = self.deepspeed_plugin.deepspeed_config
             if config.get("fp16", {}).get("enabled", False):
                 mixed_precision = "fp16"
diff --git a/tests/test_fp8.py b/tests/test_fp8.py
@@ -50,6 +50,8 @@ def can_convert_te_model(from_config=False):
         accelerator_kwargs = {}
 
     accelerator = Accelerator(**accelerator_kwargs)
+    assert accelerator.fp8_enabled, "FP8 is not enabled"
+
     dataloader = torch.utils.data.DataLoader(torch.randn(10, 32), batch_size=2)
     model = torch.nn.Sequential(torch.nn.Linear(32, 32), torch.nn.Linear(32, 16))
     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
@@ -168,6 +170,35 @@ def test_can_prepare_model_multigpu_deepspeed(self):
             command += ["-m", "tests.test_fp8", "--test_te"]
             run_command(command)
 
+    @require_deepspeed
+    @require_multi_device
+    def test_can_prepare_model_multigpu_deepspeed_from_config(self):
+        os.environ["ZERO_STAGE"] = str(1)
+        with tempfile.TemporaryDirectory() as dir_name:
+            config_file = Path(dir_name) / "config.yaml"
+            config_file.write_text(
+                textwrap.dedent(
+                    """
+                    distributed_type: "DEEPSPEED"
+                    deepspeed_config:
+                      gradient_clipping: 1.0
+                      gradient_accumulation_steps: 1
+                      offload_optimizer_device: none
+                      offload_param_device: none
+                      zero3_init_flag: false
+                      zero_stage: 1
+                      deepspeed_multinode_launcher: standard
+                    num_processes: 2
+                    mixed_precision: fp8
+                    fp8_config:
+                      backend: TE
+                    """
+                )
+            )
+            command = get_launch_command(config_file=str(config_file), monitor_interval=0.1)
+            command += ["-m", "tests.test_fp8", "--test_te", "--from_config"]
+            run_command(command)
+
 
 @require_torchao
 @require_huggingface_suite