Skip to content

llama : build windows releases with dl backends #13220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 26 additions & 45 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
key: windows-msys2
variant: sccache
variant: ccache
evict-old-files: 1d

- name: Setup ${{ matrix.sys }}
Expand Down Expand Up @@ -811,26 +811,18 @@ jobs:
strategy:
matrix:
include:
- build: 'noavx-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
- build: 'avx-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
- build: 'avx512-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
- build: 'cpu-x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
- build: 'openblas-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- build: 'kompute-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- build: 'vulkan-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
- build: 'llvm-arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
- build: 'msvc-arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
- build: 'llvm-arm64-opencl-adreno'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
# - build: 'kompute-x64'
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'

steps:
- name: Clone
Expand All @@ -843,7 +835,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
key: windows-latest-cmake-${{ matrix.build }}
variant: sccache
variant: ccache
evict-old-files: 1d

- name: Clone Kompute submodule
Expand Down Expand Up @@ -919,39 +911,26 @@ jobs:
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt

- name: Check AVX512F support
id: check_avx512f
if: ${{ matrix.build == 'avx512-x64' }}
continue-on-error: true
run: |
cd build
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"

- name: Test
id: cmake_test
# not all machines have native AVX-512
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
run: |
cd build
ctest -L main -C Release --verbose --timeout 900

- name: Test (Intel SDE)
id: cmake_test_sde
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
run: |
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
# for some weird reason windows tar doesn't like sde tar.xz
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
# TODO: disabled for now, consider adding tests for all CPU variants instead
# - name: Test (Intel SDE)
# id: cmake_test_sde
# if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
# run: |
# curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
# # for some weird reason windows tar doesn't like sde tar.xz
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
# $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
# cd build
# $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
# & $sde -future -- ctest -L main -C Release --verbose --timeout 900

- name: Determine tag name
id: tag
Expand Down Expand Up @@ -1036,7 +1015,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
variant: sccache
variant: ccache
evict-old-files: 1d

- name: Install Cuda Toolkit 11.7
Expand Down Expand Up @@ -1114,6 +1093,8 @@ jobs:
cmake -S . -B build -G "Ninja Multi-Config" ^
-DLLAMA_BUILD_SERVER=ON ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
-DGGML_CPU_ALL_VARIANTS=ON ^
-DGGML_CUDA=ON ^
-DGGML_RPC=ON ^
-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include"
Expand Down Expand Up @@ -1188,7 +1169,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
key: windows-latest-cmake-sycl
variant: sccache
variant: ccache
evict-old-files: 1d

- name: Install
Expand Down
6 changes: 0 additions & 6 deletions cmake/arm64-windows-msvc.cmake

This file was deleted.

6 changes: 0 additions & 6 deletions cmake/x64-windows-llvm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,3 @@ set( CMAKE_SYSTEM_PROCESSOR x86_64 )

set( CMAKE_C_COMPILER clang )
set( CMAKE_CXX_COMPILER clang++ )

set( arch_c_flags "-march=native" )

set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )

5 changes: 4 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ if (NOT WIN32)
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
endif()

llama_build(test-quantize-stats.cpp)
if (NOT GGML_BACKEND_DL)
llama_build(test-quantize-stats.cpp)
endif()

llama_build(test-gbnf-validator.cpp)

# build test-tokenizer-1-bpe target once and add many tests
Expand Down
Loading