Triton benchmarks(tdesc_updates) #2149
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Triton benchmarks | |
run-name: ${{ inputs.run_name }} | |
on: | |
workflow_dispatch: | |
inputs: | |
runner_label: | |
description: Runner label, keep empty for default | |
type: string | |
default: "" | |
tag: | |
description: Tag for benchmark results | |
type: string | |
default: "test" | |
benchmarking_method: | |
description: The method used to obtain performance numbers | |
type: choice | |
options: | |
- ELAPSED_TIME | |
- UPSTREAM_PYTORCH_PROFILER | |
default: UPSTREAM_PYTORCH_PROFILER | |
verify: | |
description: Verify the benchmark results | |
type: boolean | |
default: true | |
run_name: | |
description: Run name | |
type: string | |
default: "Triton benchmarks" | |
n_runs: | |
description: Number of runs for each benchmark | |
type: number | |
default: 1 | |
benchmarks: | |
description: JSON list of benchmarks to run. Leave empty to run all benchmarks. | |
type: string | |
default: "" | |
skip_benchmarks: | |
description: JSON list of benchmarks to skip | |
type: string | |
default: "[]" | |
use_pyenv_python: | |
description: Use Python built with pyenv | |
type: boolean | |
default: false | |
pull_request: | |
branches: | |
- main | |
paths: | |
- .github/workflows/triton-benchmarks.yml | |
- benchmarks/** | |
# Cancels in-progress PR runs when the PR is updated. Manual runs are never cancelled. | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event_name == 'workflow_dispatch' && github.run_id || github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'keep-going') && github.run_id || github.event.pull_request.number) || github.ref }} | |
cancel-in-progress: true | |
permissions: read-all | |
env: | |
PYTHON_VERSION: "3.10" | |
BENCHMARKING_METHOD: ${{ inputs.benchmarking_method || 'UPSTREAM_PYTORCH_PROFILER' }} | |
VERIFY: ${{ (github.event_name == 'pull_request' || github.event_name == 'schedule' || inputs.verify) && '1' || '0' }} | |
TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
N_RUNS: ${{ inputs.n_runs || '1' }} | |
jobs: | |
build: | |
name: Triton benchmarks | |
runs-on: | |
- linux | |
- ${{ inputs.runner_label || 'max1550' }} | |
timeout-minutes: 720 | |
defaults: | |
run: | |
shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
steps: | |
- name: Print inputs | |
run: | | |
cat <<EOF | |
${{ toJSON(inputs) }} | |
EOF | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Install Python | |
if: ${{ !(inputs.use_pyenv_python || false) }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Python (from pyenv) ${{ inputs.python_version }} | |
if: ${{ inputs.use_pyenv_python }} | |
uses: ./.github/actions/setup-pyenv-python | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Identify Python version | |
run: | | |
PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')" | |
echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV | |
- name: Install Python build dependencies | |
run: | | |
pip install wheel cmake | |
- name: Setup PyTorch | |
uses: ./.github/actions/setup-pytorch | |
- name: Setup Triton | |
uses: ./.github/actions/setup-triton | |
- name: Create reports dir | |
run: | | |
mkdir reports | |
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
- name: Install benchmarks | |
id: install | |
run: | | |
cd benchmarks | |
pip install . | |
- name: Run Triton Softmax kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python fused_softmax.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton GEMM kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
NEW_SHAPES=0 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-triton-report.csv --benchmark gemm-legacy --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm-legacy --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance.csv $REPORTS/gemm-onednn-report.csv --benchmark gemm-legacy --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM kernel benchmark - new shapes | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_newshapes')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_newshapes') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-newshapes-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-newshapes-onednn-report.csv --benchmark gemm --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM kernel benchmark - with tensor of pointer | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_tensor_of_ptr_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-tensor-of-ptr-performance.csv $REPORTS/gemm-tensor-of-ptr-triton-report.csv --benchmark gemm-tensor-of-ptr --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-tensor-of-ptr-performance.csv $REPORTS/gemm-tensor-of-ptr-onednn-report.csv --benchmark gemm-tensor-of-ptr --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM kernel benchmark - with tensor descriptor | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-tensor-desc-performance.csv $REPORTS/gemm-tensor-desc-triton-report.csv --benchmark gemm-tensor-desc --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-tensor-desc-performance.csv $REPORTS/gemm-tensor-desc-onednn-report.csv --benchmark gemm-tensor-desc --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM (A@B^t) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_abt')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM (A^t@B) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_atb')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM (stream-k) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_streamk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_streamk_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-xetla-report.csv --benchmark gemm-streamk --compiler xetla --param_cols "M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton GEMM (split-k) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_splitk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_splitk_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-xetla-report.csv --benchmark gemm-splitk --compiler xetla --param_cols "M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton GEMM + PreOp (exp) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_preop_exp_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_preop_exp_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
- name: Run Triton GEMM + PostOp (Gelu) kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_postop_gelu_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark bfloat16 | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-postop-addmatrix-bfloat16.csv $REPORTS/gemm-postop-addmatrix-bfloat16-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance-postop-addmatrix-bfloat16.csv $REPORTS/gemm-postop-addmatrix-bfloat16-onednn-report.csv --benchmark gemm-postop-addmatrix --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark int8 | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
INT8_ONLY=1 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/matmul-performance-postop-addmatrix-int8.csv $REPORTS/gemm-postop-addmatrix-int8-triton-report.csv --benchmark gemm-postop-addmatrix-int8 --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/matmul-performance-postop-addmatrix-int8.csv $REPORTS/gemm-postop-addmatrix-int8-onednn-report.csv --benchmark gemm-postop-addmatrix-int8 --compiler onednn --param_cols "B,M,K,N" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG | |
- name: Run Triton FA fwd kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton FA bwd kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_bwd_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_bwd_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
FA_KERNEL_MODE="bwd" \ | |
python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
mv $REPORTS/attn-performance.csv $REPORTS/attn-bwd-performance.csv | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/attn-bwd-performance.csv $REPORTS/attn-bwd-triton-report.csv --benchmark attn-bwd --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/attn-bwd-performance.csv $REPORTS/attn-bwd-xetla-report.csv --benchmark attn-bwd --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Triton FA fwd kernel benchmark - with tensor descriptors | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python flash_attention_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS | |
mv $REPORTS/attn-performance.csv $REPORTS/attn-tensor-desc-performance.csv | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/attn-tensor-desc-performance.csv $REPORTS/attn-tensor-desc-triton-report.csv --benchmark attn-tensor-desc --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
python build_report.py $REPORTS/attn-tensor-desc-performance.csv $REPORTS/attn-tensor-desc-xetla-report.csv --benchmark attn-tensor-desc --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG | |
- name: Run Prefix Sums kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'prefix_sums.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python prefix_sums.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
- name: Run micro benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'micro_benchmarks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }} | |
run: | | |
cd benchmarks/micro_benchmarks | |
python run_benchmarks.py --reports $REPORTS | |
- name: Run Triton FlexAttention Causal Mask fwd kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/flexAttnCausal-performance.csv $REPORTS/flexAttnCausal-triton-report.csv --benchmark flexAttnCausal --compiler triton --param_cols "Z,H_q,H_kv,N_CTX_q,N_CTX_kv,D_HEAD_qk,D_HEAD_v" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
- name: Run Triton FlexAttention Custom Masks fwd kernel benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py') }} | |
run: | | |
cd benchmarks/triton_kernels_benchmark | |
python flex_attention_benchmark_custom_masks.py --reports $REPORTS --n_runs $N_RUNS | |
source ../../scripts/capture-hw-details.sh | |
python build_report.py $REPORTS/flexAttnMasks-performance.csv $REPORTS/flexAttnMasks-triton-report.csv --benchmark flexAttnMasks --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,MASK" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG --mask | |
python build_report.py $REPORTS/flexAttnMasks-performance.csv $REPORTS/flexAttnMasks-onednn-report.csv --benchmark flexAttnMasks --compiler onednn --param_cols "Z,H,N_CTX,D_HEAD,MASK" --tflops_col OneDNN-TFlops --hbm_col "OneDNN-GB/s" --tag $TAG --mask | |
- name: Upload benchmark reports | |
if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: benchmark-reports | |
path: reports |