Merge branch 'moe-tests' into 'main'

ko3n1g · ko3n1g · commit 0dea9a5344d3 · 2025-06-12T04:41:05.000-07:00
Moe inference functional tests

See merge request ADLR/megatron-lm!3403
diff --git a/tests/functional_tests/test_cases/moe/gpt_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json
@@ -0,0 +1 @@
+"{\"id\": \"0\", \"input_prompt\": \"Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.\", \"generated_text\": \" Wait for the moment when the music stops, and the lights come up, and the DJ says, \\\"I'm going to play a song for you\", \"generated_tokens\": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], \"latency\": 6.023535869084299, \"logprobs\": [-9.965213775634766, -3.6972405910491943, -2.8163998126983643, -1.3259482383728027, -0.22894315421581268, -1.801922082901001, -2.380244493484497, -1.9902539253234863, -2.195096731185913, -6.201530456542969, -0.8732167482376099, -2.3890693187713623, -3.4655370712280273, -4.265195369720459, -1.9843286275863647, -1.8525164127349854, -2.247467517852783, -7.156258583068848, -0.04102461040019989, -1.9811111688613892, -5.029601097106934, -8.902811050415039, -9.822186470031738, -0.7156577706336975, -4.822559833526611, -0.830146074295044, -2.264935255050659, -0.02063065394759178, -0.0366678312420845, -3.4783172607421875, -8.650375366210938, -1.247912883758545, -6.612592697143555, -3.64731502532959, -3.6577675342559814, -4.237436771392822, -2.1768712997436523, -1.0792245864868164, -0.22580334544181824, -0.7873495221138, -4.81827974319458, -8.96638011932373, -0.01367227640002966, -3.1769614219665527, -1.3207263946533203, -3.995314121246338, -0.7868635654449463, -0.0021346656139940023, -2.9099419116973877, -10.611204147338867, -3.244929313659668, -1.103176474571228, -4.869075775146484, -0.2279863953590393, -0.06238075718283653, -1.2982008457183838, -2.208366632461548, -4.412147045135498, -0.3588172495365143, -4.0025200843811035, -0.3714170753955841, -0.14747798442840576, -2.7178127765655518, -10.553118705749512, -0.057451825588941574, -3.381279945373535, -0.8944476842880249, -4.724348068237305, -0.25962480902671814, -2.655942678451538, -0.8473785519599915, -1.5853822231292725, -5.768069267272949, -16.949235916137695, -2.675042152404785, -0.12979209423065186, -7.452098369598389, -1.1089909076690674, -2.0911808013916016, -1.5204540491104126, -0.29428866505622864, -5.85228157043457, -0.006600246299058199, -7.733879089355469, -2.7058277130126953, -2.9573605060577393, -3.0196847915649414, -2.450732469558716, -0.3994073271751404, -1.426312804222107, -2.2726848125457764, -0.6103246212005615, -1.3297024965286255, -1.936716914176941, -1.7187526226043701, -0.7779486775398254, -0.5053722858428955, -1.300978660583496, -1.588526964187622, -0.9849303960800171, -0.4031231701374054, -0.4341556429862976, -0.04193130508065224, -1.2715754508972168, -2.116468906402588, -2.6802122592926025, -0.8255553245544434, -0.42921727895736694, -2.904050350189209, -1.4616029262542725, -1.6294372081756592, -0.05650198459625244, -1.3804056644439697, -1.3228214979171753, -1.268000602722168, -1.2933895587921143, -0.5357464551925659]}"
diff --git a/tests/functional_tests/test_cases/moe/gpt_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
@@ -0,0 +1,78 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+TEST_TYPE: frozen-start
+MODE: inference
+MODEL_ARGS:
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --timing-log-level: 2
+  --load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
+  --tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
+  --tokenizer-type: TikTokenizer
+  --tiktoken-pattern: v2
+  --distributed-backend: nccl
+  --log-interval: 1
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 1
+  --use-mcore-models: true
+  --moe-token-dispatcher-type: alltoall
+  --moe-grouped-gemm: true
+  --num-experts: 64
+  --moe-router-topk: 6
+  --moe-z-loss-coeff: 0
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-aux-loss-coeff: 1e-3
+  --moe-router-score-function: sigmoid
+  --untie-embeddings-and-output-weights: true
+  --disable-bias-linear: true
+  --init-method-std: 0.014
+  --position-embedding-type: rope
+  --rotary-base: 1000000
+  --rotary-percent: 1.0
+  --num-layers: 27
+  --hidden-size: 2048
+  --moe-ffn-hidden-size: 1408
+  --moe-shared-expert-intermediate-size: 2816
+  --ffn-hidden-size: 10944
+  --num-attention-heads: 16
+  --kv-channels: 128
+  --normalization: RMSNorm
+  --swiglu: true
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --seq-length: 4096
+  --max-position-embeddings: 4096
+  --micro-batch-size: 1
+  --ckpt-format: torch_dist
+  --ckpt-fully-parallel-save: true
+  --ckpt-fully-parallel-load: true
+  --ckpt-assume-constant-structure: true
+  --dist-ckpt-strictness: log_unexpected
+  --bf16: true
+  --attention-backend: flash
+  --no-create-attention-mask-in-dataloader: true
+  --num-workers: 8
+  --flash-decode: true
+  --use-checkpoint-args: true
+  --no-use-tokenizer-model-from-checkpoint-args: true
+  --no-load-optim: true
+  --deterministic-mode: true
+  --save-interval: 2000
+  --temperature: 1.0
+  --top_k: 1
+  --return-log-probs: true
+  --num-tokens-to-generate: 30
+  --max-tokens-to-oom: 3600000
+  --inference-max-seq-length: 4096
+  --output-path: ${TENSORBOARD_PATH}
+  --prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
+METRICS:
+  - "generated_tokens"
+  - "logprobs"
diff --git a/tests/test_utils/recipes/moe-inference.yaml b/tests/test_utils/recipes/moe-inference.yaml
@@ -0,0 +1,63 @@
+type: basic
+format_version: 1
+maintainers: [mcore]
+loggers: [stdout]
+spec:
+  name: "{test_case}_{environment}_{platforms}"
+  model: moe
+  build: mcore-pyt-{environment}
+  nodes: 1
+  gpus: 1
+  n_repeat: 1
+  platforms: dgx_a100
+  artifacts:
+    /workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
+    /workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
+  script_setup: |
+    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
+
+    # Checkout latest
+    cd /opt
+    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
+    git fetch origin $MCORE_MR_COMMIT
+    git checkout $MCORE_MR_COMMIT
+    git rev-parse HEAD
+    # Checkout backwards-ref
+    cd /opt
+    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
+    git init
+    git remote add origin $MCORE_REPO
+    git fetch origin $MCORE_BACKWARDS_COMMIT
+    git checkout $MCORE_BACKWARDS_COMMIT
+    git rev-parse HEAD
+    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
+  script: |-
+    ls
+    cd /opt/megatron-lm
+
+    ARGUMENTS=(
+        "CHECKPOINT_LOAD_PATH=/workspace/data"
+        "CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
+        "DATA_PATH=/workspace/data"
+        "DATA_CACHE_PATH=/workspace/data/cache"
+        "TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
+        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
+        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
+        "OUTPUT_PATH={assets_dir}"
+        "TENSORBOARD_PATH={assets_dir}/generations.json"
+        "N_REPEAT={n_repeat}"
+        "ENABLE_LIGHTWEIGHT_MODE=false"
+        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
+    )
+
+    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
+
+products:
+  - test_case: [gpt_inference_tp1_pp1_ep1_16B_logitsmatch]
+    products:
+      - environment: [dev]
+        scope: [mr]
+        platforms: [dgx_h100]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+"{\"id\": \"0\", \"input_prompt\": \"Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.\", \"generated_text\": \" Wait for the moment when the music stops, and the lights come up, and the DJ says, \\\"I'm going to play a song for you\", \"generated_tokens\": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], \"latency\": 6.023535869084299, \"logprobs\": [-9.965213775634766, -3.6972405910491943, -2.8163998126983643, -1.3259482383728027, -0.22894315421581268, -1.801922082901001, -2.380244493484497, -1.9902539253234863, -2.195096731185913, -6.201530456542969, -0.8732167482376099, -2.3890693187713623, -3.4655370712280273, -4.265195369720459, -1.9843286275863647, -1.8525164127349854, -2.247467517852783, -7.156258583068848, -0.04102461040019989, -1.9811111688613892, -5.029601097106934, -8.902811050415039, -9.822186470031738, -0.7156577706336975, -4.822559833526611, -0.830146074295044, -2.264935255050659, -0.02063065394759178, -0.0366678312420845, -3.4783172607421875, -8.650375366210938, -1.247912883758545, -6.612592697143555, -3.64731502532959, -3.6577675342559814, -4.237436771392822, -2.1768712997436523, -1.0792245864868164, -0.22580334544181824, -0.7873495221138, -4.81827974319458, -8.96638011932373, -0.01367227640002966, -3.1769614219665527, -1.3207263946533203, -3.995314121246338, -0.7868635654449463, -0.0021346656139940023, -2.9099419116973877, -10.611204147338867, -3.244929313659668, -1.103176474571228, -4.869075775146484, -0.2279863953590393, -0.06238075718283653, -1.2982008457183838, -2.208366632461548, -4.412147045135498, -0.3588172495365143, -4.0025200843811035, -0.3714170753955841, -0.14747798442840576, -2.7178127765655518, -10.553118705749512, -0.057451825588941574, -3.381279945373535, -0.8944476842880249, -4.724348068237305, -0.25962480902671814, -2.655942678451538, -0.8473785519599915, -1.5853822231292725, -5.768069267272949, -16.949235916137695, -2.675042152404785, -0.12979209423065186, -7.452098369598389, -1.1089909076690674, -2.0911808013916016, -1.5204540491104126, -0.29428866505622864, -5.85228157043457, -0.006600246299058199, -7.733879089355469, -2.7058277130126953, -2.9573605060577393, -3.0196847915649414, -2.450732469558716, -0.3994073271751404, -1.426312804222107, -2.2726848125457764, -0.6103246212005615, -1.3297024965286255, -1.936716914176941, -1.7187526226043701, -0.7779486775398254, -0.5053722858428955, -1.300978660583496, -1.588526964187622, -0.9849303960800171, -0.4031231701374054, -0.4341556429862976, -0.04193130508065224, -1.2715754508972168, -2.116468906402588, -2.6802122592926025, -0.8255553245544434, -0.42921727895736694, -2.904050350189209, -1.4616029262542725, -1.6294372081756592, -0.05650198459625244, -1.3804056644439697, -1.3228214979171753, -1.268000602722168, -1.2933895587921143, -0.5357464551925659]}"