Skip to content

Commit 0dea9a5

Browse files
committed
Merge branch 'moe-tests' into 'main'
Moe inference functional tests See merge request ADLR/megatron-lm!3403
2 parents 0595ef2 + 9e5fe7a commit 0dea9a5

File tree

3 files changed

+142
-0
lines changed

3 files changed

+142
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"{\"id\": \"0\", \"input_prompt\": \"Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.\", \"generated_text\": \" Wait for the moment when the music stops, and the lights come up, and the DJ says, \\\"I'm going to play a song for you\", \"generated_tokens\": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], \"latency\": 6.023535869084299, \"logprobs\": [-9.965213775634766, -3.6972405910491943, -2.8163998126983643, -1.3259482383728027, -0.22894315421581268, -1.801922082901001, -2.380244493484497, -1.9902539253234863, -2.195096731185913, -6.201530456542969, -0.8732167482376099, -2.3890693187713623, -3.4655370712280273, -4.265195369720459, -1.9843286275863647, -1.8525164127349854, -2.247467517852783, -7.156258583068848, -0.04102461040019989, -1.9811111688613892, -5.029601097106934, -8.902811050415039, -9.822186470031738, -0.7156577706336975, -4.822559833526611, -0.830146074295044, -2.264935255050659, -0.02063065394759178, -0.0366678312420845, -3.4783172607421875, -8.650375366210938, -1.247912883758545, -6.612592697143555, -3.64731502532959, -3.6577675342559814, -4.237436771392822, -2.1768712997436523, -1.0792245864868164, -0.22580334544181824, -0.7873495221138, -4.81827974319458, -8.96638011932373, -0.01367227640002966, -3.1769614219665527, -1.3207263946533203, -3.995314121246338, -0.7868635654449463, -0.0021346656139940023, -2.9099419116973877, -10.611204147338867, -3.244929313659668, -1.103176474571228, -4.869075775146484, -0.2279863953590393, -0.06238075718283653, -1.2982008457183838, -2.208366632461548, -4.412147045135498, -0.3588172495365143, -4.0025200843811035, -0.3714170753955841, -0.14747798442840576, -2.7178127765655518, -10.553118705749512, -0.057451825588941574, -3.381279945373535, -0.8944476842880249, -4.724348068237305, -0.25962480902671814, -2.655942678451538, -0.8473785519599915, -1.5853822231292725, -5.768069267272949, -16.949235916137695, -2.675042152404785, -0.12979209423065186, -7.452098369598389, -1.1089909076690674, -2.0911808013916016, -1.5204540491104126, -0.29428866505622864, -5.85228157043457, -0.006600246299058199, -7.733879089355469, -2.7058277130126953, -2.9573605060577393, -3.0196847915649414, -2.450732469558716, -0.3994073271751404, -1.426312804222107, -2.2726848125457764, -0.6103246212005615, -1.3297024965286255, -1.936716914176941, -1.7187526226043701, -0.7779486775398254, -0.5053722858428955, -1.300978660583496, -1.588526964187622, -0.9849303960800171, -0.4031231701374054, -0.4341556429862976, -0.04193130508065224, -1.2715754508972168, -2.116468906402588, -2.6802122592926025, -0.8255553245544434, -0.42921727895736694, -2.904050350189209, -1.4616029262542725, -1.6294372081756592, -0.05650198459625244, -1.3804056644439697, -1.3228214979171753, -1.268000602722168, -1.2933895587921143, -0.5357464551925659]}"
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
ENV_VARS:
2+
CUDA_DEVICE_MAX_CONNECTIONS: 1
3+
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
4+
NCCL_ALGO: Ring
5+
CUBLAS_WORKSPACE_CONFIG: :4096:8
6+
TEST_TYPE: frozen-start
7+
MODE: inference
8+
MODEL_ARGS:
9+
--log-num-zeros-in-grad: true
10+
--log-validation-ppl-to-tensorboard: true
11+
--log-timers-to-tensorboard: true
12+
--log-memory-to-tensorboard: true
13+
--timing-log-level: 2
14+
--load: ${CHECKPOINT_LOAD_PATH}/deepseek_16b_pyt/model/checkpoints
15+
--tokenizer-model: ${DATA_PATH}/deepseek_16b_pyt/tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json
16+
--tokenizer-type: TikTokenizer
17+
--tiktoken-pattern: v2
18+
--distributed-backend: nccl
19+
--log-interval: 1
20+
--transformer-impl: transformer_engine
21+
--tensor-model-parallel-size: 1
22+
--pipeline-model-parallel-size: 1
23+
--expert-model-parallel-size: 1
24+
--use-mcore-models: true
25+
--moe-token-dispatcher-type: alltoall
26+
--moe-grouped-gemm: true
27+
--num-experts: 64
28+
--moe-router-topk: 6
29+
--moe-z-loss-coeff: 0
30+
--moe-router-load-balancing-type: seq_aux_loss
31+
--moe-aux-loss-coeff: 1e-3
32+
--moe-router-score-function: sigmoid
33+
--untie-embeddings-and-output-weights: true
34+
--disable-bias-linear: true
35+
--init-method-std: 0.014
36+
--position-embedding-type: rope
37+
--rotary-base: 1000000
38+
--rotary-percent: 1.0
39+
--num-layers: 27
40+
--hidden-size: 2048
41+
--moe-ffn-hidden-size: 1408
42+
--moe-shared-expert-intermediate-size: 2816
43+
--ffn-hidden-size: 10944
44+
--num-attention-heads: 16
45+
--kv-channels: 128
46+
--normalization: RMSNorm
47+
--swiglu: true
48+
--attention-dropout: 0.0
49+
--hidden-dropout: 0.0
50+
--seq-length: 4096
51+
--max-position-embeddings: 4096
52+
--micro-batch-size: 1
53+
--ckpt-format: torch_dist
54+
--ckpt-fully-parallel-save: true
55+
--ckpt-fully-parallel-load: true
56+
--ckpt-assume-constant-structure: true
57+
--dist-ckpt-strictness: log_unexpected
58+
--bf16: true
59+
--attention-backend: flash
60+
--no-create-attention-mask-in-dataloader: true
61+
--num-workers: 8
62+
--flash-decode: true
63+
--use-checkpoint-args: true
64+
--no-use-tokenizer-model-from-checkpoint-args: true
65+
--no-load-optim: true
66+
--deterministic-mode: true
67+
--save-interval: 2000
68+
--temperature: 1.0
69+
--top_k: 1
70+
--return-log-probs: true
71+
--num-tokens-to-generate: 30
72+
--max-tokens-to-oom: 3600000
73+
--inference-max-seq-length: 4096
74+
--output-path: ${TENSORBOARD_PATH}
75+
--prompts: "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies."
76+
METRICS:
77+
- "generated_tokens"
78+
- "logprobs"
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
type: basic
2+
format_version: 1
3+
maintainers: [mcore]
4+
loggers: [stdout]
5+
spec:
6+
name: "{test_case}_{environment}_{platforms}"
7+
model: moe
8+
build: mcore-pyt-{environment}
9+
nodes: 1
10+
gpus: 1
11+
n_repeat: 1
12+
platforms: dgx_a100
13+
artifacts:
14+
/workspace/data/deepseek_16b_pyt/model: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
15+
/workspace/data/deepseek_16b_pyt/tokenizer: model/deepseek_16b_pyt/dcp/mcore-v1_bf16
16+
script_setup: |
17+
echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
18+
19+
# Checkout latest
20+
cd /opt
21+
rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
22+
git init
23+
git remote add origin $MCORE_REPO
24+
git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
25+
git fetch origin $MCORE_MR_COMMIT
26+
git checkout $MCORE_MR_COMMIT
27+
git rev-parse HEAD
28+
# Checkout backwards-ref
29+
cd /opt
30+
rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
31+
git init
32+
git remote add origin $MCORE_REPO
33+
git fetch origin $MCORE_BACKWARDS_COMMIT
34+
git checkout $MCORE_BACKWARDS_COMMIT
35+
git rev-parse HEAD
36+
rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
37+
script: |-
38+
ls
39+
cd /opt/megatron-lm
40+
41+
ARGUMENTS=(
42+
"CHECKPOINT_LOAD_PATH=/workspace/data"
43+
"CHECKPOINT_SAVE_PATH=/tmp/checkpoints"
44+
"DATA_PATH=/workspace/data"
45+
"DATA_CACHE_PATH=/workspace/data/cache"
46+
"TRAINING_SCRIPT_PATH=examples/inference/gpt/gpt_static_inference.py"
47+
"TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
48+
"GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
49+
"OUTPUT_PATH={assets_dir}"
50+
"TENSORBOARD_PATH={assets_dir}/generations.json"
51+
"N_REPEAT={n_repeat}"
52+
"ENABLE_LIGHTWEIGHT_MODE=false"
53+
"RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
54+
)
55+
56+
bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
57+
58+
products:
59+
- test_case: [gpt_inference_tp1_pp1_ep1_16B_logitsmatch]
60+
products:
61+
- environment: [dev]
62+
scope: [mr]
63+
platforms: [dgx_h100]

0 commit comments

Comments
 (0)