Skip to content

Commit 171c351

Browse files
guyueh1ko3n1g
authored andcommitted
ADLR/megatron-lm!3480 - Fix unit test test_fp8_param.py blockwise scaling
1 parent c3dc507 commit 171c351

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

tests/unit_tests/test_fp8_param.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
class TestFP8Param:
3333

3434
def setup_method(self, method):
35-
self.seq_length = 32
35+
self.seq_length = 512
3636
self.micro_batch_size = 2
3737
os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
3838

@@ -76,7 +76,7 @@ def create_test_args(self, tp, recipe, sequence_length, micro_batch_size, **kwar
7676
args.vocal_size = 128800
7777
args.hidden_size = 128
7878
args.num_attention_heads = 8
79-
args.max_position_embeddings = 256
79+
args.max_position_embeddings = 512
8080
args.micro_batch_size = micro_batch_size
8181
args.create_attention_mask_in_dataloader = True
8282
args.seq_length = sequence_length
@@ -122,6 +122,12 @@ def run_test(self, tp_size, recipe, **kwargs):
122122
args = self.create_test_args(
123123
tp_size, recipe, self.seq_length, self.micro_batch_size, **kwargs
124124
)
125+
126+
if recipe == "blockwise" and args.sequence_parallel:
127+
assert (
128+
tp_size * 128 <= self.seq_length
129+
), "Blockwise recipe and sequence parallelism requires tp_size * 128 <= seq_length"
130+
125131
set_args(args)
126132
torch.manual_seed(_SEED)
127133
Utils.initialize_model_parallel(tensor_model_parallel_size=tp_size)

0 commit comments

Comments
 (0)