NVIDIA
diff --git a/‎examples/model_api/README.md
-67 b/‎examples/model_api/README.md
-67
diff --git a/‎examples/model_api/llama.py
-67 b/‎examples/model_api/llama.py
-67
diff --git a/‎examples/model_api/llama_multi_gpu.py
-97 b/‎examples/model_api/llama_multi_gpu.py
-97
diff --git a/‎examples/model_api/llama_quantize.py
-77 b/‎examples/model_api/llama_quantize.py
-77
diff --git a/‎tests/integration/defs/test_e2e.py
-27 b/‎tests/integration/defs/test_e2e.py
-27
diff --git a/‎tests/integration/test_lists/test-db/l0_a10.yml
-1 b/‎tests/integration/test_lists/test-db/l0_a10.yml
-1
diff --git a/‎tests/integration/test_lists/test-db/l0_h100.yml
-1 b/‎tests/integration/test_lists/test-db/l0_h100.yml
-1
diff --git a/‎tests/integration/test_lists/waives.txt
-3 b/‎tests/integration/test_lists/waives.txt
-3
@@ -1223,22 +1223,6 @@ def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
     ])
 
 
-@pytest.mark.skip_less_device_memory(40000)
-def test_model_api_examples(llm_root, llm_venv, engine_dir):
-    example_root = Path(llm_root) / "examples" / "model_api"
-    llm_venv.run_cmd([
-        str(example_root / "llama.py"), "--engine_dir",
-        f"{engine_dir}/model_api", "--hf_model_dir",
-        f"{llm_models_root()}/llama-models/llama-7b-hf"
-    ])
-
-    llm_venv.run_cmd([
-        str(example_root / "llama_quantize.py"), "--cache_dir",
-        f"{engine_dir}/model_api_quantized", "--hf_model_dir",
-        f"{llm_models_root()}/llama-models/llama-7b-hf"
-    ])
-
-
 def test_build_time_benchmark_sanity(llm_root, llm_venv):
     temp = tempfile.TemporaryDirectory()
     llm_venv.run_cmd([
@@ -1248,17 +1232,6 @@ def test_build_time_benchmark_sanity(llm_root, llm_venv):
     ])
 
 
-@pytest.mark.skip_less_device(2)
-@pytest.mark.skip_less_device_memory(40000)
-def test_model_api_examples_tp2(llm_root, llm_venv, engine_dir):
-    example_root = Path(llm_root) / "examples" / "model_api"
-    llm_venv.run_cmd([
-        str(example_root / "llama_multi_gpu.py"), "--engine_dir",
-        f"{engine_dir}/model_api_tp2", "--hf_model_dir",
-        f"{llm_models_root()}/llama-models/llama-7b-hf"
-    ])
-
-
 ### LLMAPI examples
 def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
                         *args):
 
@@ -125,7 +125,6 @@ l0_a10:
   - test_e2e.py::test_benchmark_sanity[t5_base]
   - examples/test_openai.py::test_llm_openai_triton_1gpu
   - examples/test_openai.py::test_llm_openai_triton_plugingen_1gpu
-  - test_e2e.py::test_model_api_examples # check with Tao can we remove it
   - test_e2e.py::test_build_time_benchmark_sanity
   - examples/test_whisper.py::test_llm_whisper_general[large-v3-enable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]
   - examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime] # 4 mins
@@ -152,5 +152,4 @@ l0_h100:
   - test_e2e.py::test_benchmark_sanity_enable_fp8[llama_7b] # 55.77s H100 only
   - test_e2e.py::test_benchmark_sanity_enable_fp8[gpt_350m] # 34.07s H100 only
   - unittest/bindings # 8 mins on H100
-  - test_e2e.py::test_model_api_examples
   - test_e2e.py::test_build_time_benchmark_sanity
@@ -62,7 +62,6 @@ examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bf
 examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_cpp_session-tp1] SKIP (http://nvbugs/4985405)
 examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp1] SKIP (http://nvbugs/4985405)
 examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2] SKIP (http://nvbugs/4985405)
-test_e2e.py::test_model_api_examples_tp2 SKIP (executor.py MGMN is broken, https://nvbugs/200490052)
 examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-it-flax-int8_kv_cache-bfloat16-8] SKIP (https://nvbugs/4979772)
 examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-full_prec] SKIP (https://nvbugs/5000026)
 examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (https://nvbugs/5000026)
@@ -129,7 +128,6 @@ full:B200_PCIe/unittest/quantization SKIP (Disable for Blackwell)
 full:B200_PCIe/accuracy/test_accuracy.py::TestVicuna7B::test_medusa[] SKIP (Disable for Blackwell)
 full:B200_PCIe/accuracy/test_accuracy.py::TestVicuna7B::test_medusa[cuda_graph] SKIP (Disable for Blackwell)
 full:B200_PCIe/accuracy/test_accuracy.py::TestVicuna7B::test_lookahead SKIP (Disable for Blackwell)
-full:B200_PCIe/test_e2e.py::test_model_api_examples SKIP (Disable for Blackwell)
 full:B200_PCIe/unittest/attention/test_bert_attention.py SKIP (Disable for Blackwell)
 full:B200_PCIe/unittest/model/test_mamba.py SKIP (Disable for Blackwell)
 full:B200_PCIe/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
@@ -246,7 +244,6 @@ full:B200/unittest/quantization SKIP (Disable for Blackwell)
 full:B200/accuracy/test_accuracy.py::TestVicuna7B::test_medusa[] SKIP (Disable for Blackwell)
 full:B200/accuracy/test_accuracy.py::TestVicuna7B::test_medusa[cuda_graph] SKIP (Disable for Blackwell)
 full:B200/accuracy/test_accuracy.py::TestVicuna7B::test_lookahead SKIP (Disable for Blackwell)
-full:B200/test_e2e.py::test_model_api_examples SKIP (Disable for Blackwell)
 full:B200/unittest/attention/test_bert_attention.py SKIP (Disable for Blackwell)
 full:B200/unittest/model/test_mamba.py SKIP (Disable for Blackwell)
 full:B200/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)