xfail

v-chen_data · v-chen_data · commit bb6150da965f · 2024-06-21T14:13:03.000-07:00
diff --git a/tests/trainer/test_fsdp_checkpoint.py b/tests/trainer/test_fsdp_checkpoint.py
@@ -300,84 +300,13 @@ def _compare_timestamps_between_state_dicts(state_dict1, state_dict2):
         pytest.param(2, 'adam', False, 'amp_bf16', False, True, False, False, False, marks=pytest.mark.world_size(2)),
         pytest.param(2, 'adam', False, 'amp_bf16', False, False, True, False, False, marks=pytest.mark.world_size(2)),
         pytest.param(4, 'adam', False, 'amp_bf16', False, False, False, True, False, marks=pytest.mark.world_size(4)),
-        pytest.param(
-            4,
-            'adam',
-            False,
-            'amp_bf16',
-            False,
-            False,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),
-        pytest.param(
-            4,
-            'adamw',
-            False,
-            'amp_bf16',
-            False,
-            False,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),
-        pytest.param(
-            4,
-            'adam',
-            True,
-            'amp_bf16',
-            False,
-            False,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),
-        pytest.param(
-            4,
-            'adam',
-            False,
-            'amp_fp16',
-            False,
-            False,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),
-        pytest.param(
-            4,
-            'adam',
-            False,
-            'amp_bf16',
-            True,
-            True,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),  # save_weights_only requires load_weights_only
-        pytest.param(
-            4,
-            'adam',
-            False,
-            'amp_bf16',
-            False,
-            True,
-            False,
-            False,
-            True,
-            marks=[pytest.mark.world_size(4),
-                   pytest.mark.xfail(reason='Known issue, waiting for composer bump')],
-        ),
+        pytest.param(4, 'adam', False, 'amp_bf16', False, False, False, False, True, marks=pytest.mark.world_size(4)),
+        pytest.param(4, 'adamw', False, 'amp_bf16', False, False, False, False, True, marks=pytest.mark.world_size(4)),
+        pytest.param(4, 'adam', True, 'amp_bf16', False, False, False, False, True, marks=pytest.mark.world_size(4)),
+        pytest.param(4, 'adam', False, 'amp_fp16', False, False, False, False, True, marks=pytest.mark.world_size(4)),
+        pytest.param(4, 'adam', False, 'amp_bf16', True, True, False, False, True,
+                     marks=pytest.mark.world_size(4)),  # save_weights_only requires load_weights_only
+        pytest.param(4, 'adam', False, 'amp_bf16', False, True, False, False, True, marks=pytest.mark.world_size(4)),
     ],
 )
 def test_fsdp_full_state_dict_load(
@@ -392,6 +321,8 @@ def test_fsdp_full_state_dict_load(
     use_tp: bool,
     use_hsdp: bool,
 ):
+    if use_hsdp:
+        pytest.xfail('Known Pytorch issue with HSDP, waiting for pytorch patch')
     if autoresume:
         run_name = 'my-cool-autoresume-run'
     else: