Skip to content

Commit f91fb97

Browse files
committed
Merge branch 'master' into xsn/server_mtmd
2 parents e82fea8 + 3bf785f commit f91fb97

File tree

300 files changed

+7029
-3061
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

300 files changed

+7029
-3061
lines changed

.devops/cpu.Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ WORKDIR /app
1414
COPY . .
1515

1616
RUN if [ "$TARGETARCH" = "amd64" ]; then \
17-
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
17+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
1818
elif [ "$TARGETARCH" = "arm64" ]; then \
19-
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
19+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
2020
else \
2121
echo "Unsupported architecture"; \
2222
exit 1; \

.devops/cuda.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ COPY . .
2121
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2222
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2323
fi && \
24-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \

.devops/intel.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1818
fi && \
1919
echo "Building with dynamic libs" && \
20-
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
20+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \

.devops/llama-cli-cann.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
2222

2323
RUN echo "Building with static libs" && \
2424
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
25+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
2626
cmake --build build --config Release --target llama-cli
2727

2828
# TODO: use image with NNRT

.devops/musa.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ COPY . .
3535
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3636
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
3737
fi && \
38-
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
38+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
3939
cmake --build build --config Release -j$(nproc)
4040

4141
RUN mkdir -p /app/lib && \

.devops/rocm.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ WORKDIR /app
4040
COPY . .
4141

4242
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
43+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
4444
&& cmake --build build --config Release -j$(nproc)
4545

4646
RUN mkdir -p /app/lib \

.devops/vulkan.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ WORKDIR /app
1616

1717
COPY . .
1818

19-
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
19+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
2020
cmake --build build --config Release -j$(nproc)
2121

2222
RUN mkdir -p /app/lib && \

.editorconfig

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,23 @@ indent_style = tab
2121
[prompts/*.txt]
2222
insert_final_newline = unset
2323

24-
[examples/server/public/*]
24+
[tools/server/public/*]
2525
indent_size = 2
2626

27-
[examples/server/public/deps_*]
27+
[tools/server/public/deps_*]
2828
trim_trailing_whitespace = unset
2929
indent_style = unset
3030
indent_size = unset
3131

32-
[examples/server/deps_*]
32+
[tools/server/deps_*]
3333
trim_trailing_whitespace = unset
3434
indent_style = unset
3535
indent_size = unset
3636

3737
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
3838
indent_style = tab
3939

40-
[examples/cvector-generator/*.txt]
40+
[tools/cvector-generator/*.txt]
4141
trim_trailing_whitespace = unset
4242
insert_final_newline = unset
4343

.flake8

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
max-line-length = 125
33
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
44
exclude =
5-
# Do not traverse examples
5+
# Do not traverse examples and tools
66
examples,
7+
tools,
78
# Do not include package initializers
89
__init__.py,
910
# No need to traverse our git directory

.github/labeler.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ build:
4545
- CMakePresets.json
4646
examples:
4747
- changed-files:
48-
- any-glob-to-any-file: examples/**
48+
- any-glob-to-any-file:
49+
- examples/**
50+
- tools/**
4951
devops:
5052
- changed-files:
5153
- any-glob-to-any-file:
@@ -70,7 +72,7 @@ android:
7072
server:
7173
- changed-files:
7274
- any-glob-to-any-file:
73-
- examples/server/**
75+
- tools/server/**
7476
ggml:
7577
- changed-files:
7678
- any-glob-to-any-file:

.github/workflows/bench.yml.disabled

+15-15
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ on:
2727
push:
2828
branches:
2929
- master
30-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
3131
pull_request_target:
3232
types: [opened, synchronize, reopened]
33-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
33+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
3434
schedule:
3535
- cron: '04 2 * * *'
3636

@@ -69,7 +69,7 @@ jobs:
6969
- name: Install python env
7070
id: pipenv
7171
run: |
72-
cd examples/server/bench
72+
cd tools/server/bench
7373
python3 -m venv venv
7474
source venv/bin/activate
7575
pip install -r requirements.txt
@@ -79,7 +79,7 @@ jobs:
7979
run: |
8080
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
8181
tar xzf prometheus*.tar.gz --strip-components=1
82-
./prometheus --config.file=examples/server/bench/prometheus.yml &
82+
./prometheus --config.file=tools/server/bench/prometheus.yml &
8383
while ! nc -z localhost 9090; do
8484
sleep 0.1
8585
done
@@ -92,7 +92,7 @@ jobs:
9292
- name: Install k6 and xk6-sse
9393
id: k6_installation
9494
run: |
95-
cd examples/server/bench
95+
cd tools/server/bench
9696
go install go.k6.io/xk6/cmd/xk6@latest
9797
xk6 build master \
9898
--with github.com/phymbert/xk6-sse
@@ -116,7 +116,7 @@ jobs:
116116
- name: Download the dataset
117117
id: download_dataset
118118
run: |
119-
cd examples/server/bench
119+
cd tools/server/bench
120120
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
121121

122122
- name: Server bench
@@ -126,7 +126,7 @@ jobs:
126126
run: |
127127
set -eux
128128

129-
cd examples/server/bench
129+
cd tools/server/bench
130130
source venv/bin/activate
131131
python bench.py \
132132
--runner-label ${{ env.RUNNER_LABEL }} \
@@ -157,9 +157,9 @@ jobs:
157157
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
158158
compression-level: 9
159159
path: |
160-
examples/server/bench/*.jpg
161-
examples/server/bench/*.json
162-
examples/server/bench/*.log
160+
tools/server/bench/*.jpg
161+
tools/server/bench/*.json
162+
tools/server/bench/*.log
163163

164164
- name: Commit status
165165
uses: Sibz/github-status-action@v1
@@ -178,17 +178,17 @@ jobs:
178178
with:
179179
client_id: ${{secrets.IMGUR_CLIENT_ID}}
180180
path: |
181-
examples/server/bench/prompt_tokens_seconds.jpg
182-
examples/server/bench/predicted_tokens_seconds.jpg
183-
examples/server/bench/kv_cache_usage_ratio.jpg
184-
examples/server/bench/requests_processing.jpg
181+
tools/server/bench/prompt_tokens_seconds.jpg
182+
tools/server/bench/predicted_tokens_seconds.jpg
183+
tools/server/bench/kv_cache_usage_ratio.jpg
184+
tools/server/bench/requests_processing.jpg
185185

186186
- name: Extract mermaid
187187
id: set_mermaid
188188
run: |
189189
set -eux
190190

191-
cd examples/server/bench
191+
cd tools/server/bench
192192
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
193193
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
194194
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV

.github/workflows/build-linux-cross.yml

+42-24
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,25 @@ on:
44
workflow_call:
55

66
jobs:
7-
ubuntu-latest-riscv64-cpu-cross:
8-
runs-on: ubuntu-latest
7+
ubuntu-24-riscv64-cpu-cross:
8+
runs-on: ubuntu-24.04
99

1010
steps:
1111
- uses: actions/checkout@v4
1212
- name: Setup Riscv
1313
run: |
1414
sudo dpkg --add-architecture riscv64
15-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
17-
sudo apt-get clean
18-
sudo apt-get update
15+
16+
# Add arch-specific repositories for non-amd64 architectures
17+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22+
EOF
23+
24+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
25+
1926
sudo apt-get install -y --no-install-recommends \
2027
build-essential \
2128
gcc-14-riscv64-linux-gnu \
@@ -27,6 +34,7 @@ jobs:
2734
cmake -B build -DCMAKE_BUILD_TYPE=Release \
2835
-DGGML_OPENMP=OFF \
2936
-DLLAMA_BUILD_EXAMPLES=ON \
37+
-DLLAMA_BUILD_TOOLS=ON \
3038
-DLLAMA_BUILD_TESTS=OFF \
3139
-DCMAKE_SYSTEM_NAME=Linux \
3240
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -40,21 +48,25 @@ jobs:
4048
4149
cmake --build build --config Release -j $(nproc)
4250
43-
ubuntu-latest-riscv64-vulkan-cross:
44-
runs-on: ubuntu-latest
51+
ubuntu-24-riscv64-vulkan-cross:
52+
runs-on: ubuntu-24.04
4553

4654
steps:
4755
- uses: actions/checkout@v4
48-
with:
49-
fetch-depth: 0
50-
5156
- name: Setup Riscv
5257
run: |
5358
sudo dpkg --add-architecture riscv64
54-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
55-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
56-
sudo apt-get clean
57-
sudo apt-get update
59+
60+
# Add arch-specific repositories for non-amd64 architectures
61+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
62+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
63+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
64+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
65+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
66+
EOF
67+
68+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
69+
5870
sudo apt-get install -y --no-install-recommends \
5971
build-essential \
6072
glslc \
@@ -69,6 +81,7 @@ jobs:
6981
-DGGML_VULKAN=ON \
7082
-DGGML_OPENMP=OFF \
7183
-DLLAMA_BUILD_EXAMPLES=ON \
84+
-DLLAMA_BUILD_TOOLS=ON \
7285
-DLLAMA_BUILD_TESTS=OFF \
7386
-DCMAKE_SYSTEM_NAME=Linux \
7487
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -82,21 +95,25 @@ jobs:
8295
8396
cmake --build build --config Release -j $(nproc)
8497
85-
ubuntu-latest-arm64-vulkan-cross:
86-
runs-on: ubuntu-latest
98+
ubuntu-24-arm64-vulkan-cross:
99+
runs-on: ubuntu-24.04
87100

88101
steps:
89102
- uses: actions/checkout@v4
90-
with:
91-
fetch-depth: 0
92-
93103
- name: Setup Arm64
94104
run: |
95105
sudo dpkg --add-architecture arm64
96-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
97-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
98-
sudo apt-get clean
99-
sudo apt-get update
106+
107+
# Add arch-specific repositories for non-amd64 architectures
108+
cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
109+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
110+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
111+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
112+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
113+
EOF
114+
115+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
116+
100117
sudo apt-get install -y --no-install-recommends \
101118
build-essential \
102119
glslc \
@@ -110,6 +127,7 @@ jobs:
110127
-DGGML_VULKAN=ON \
111128
-DGGML_OPENMP=OFF \
112129
-DLLAMA_BUILD_EXAMPLES=ON \
130+
-DLLAMA_BUILD_TOOLS=ON \
113131
-DLLAMA_BUILD_TESTS=OFF \
114132
-DCMAKE_SYSTEM_NAME=Linux \
115133
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \

.github/workflows/build.yml

+7-3
Original file line numberDiff line numberDiff line change
@@ -601,9 +601,8 @@ jobs:
601601
-DGGML_SYCL_F16=ON
602602
cmake --build build --config Release -j $(nproc)
603603
604-
# Disabled for now due to sporadic issue syncing.
605-
# build-linux-cross:
606-
# uses: ./.github/workflows/build-linux-cross.yml
604+
build-linux-cross:
605+
uses: ./.github/workflows/build-linux-cross.yml
607606

608607
macOS-latest-cmake-ios:
609608
runs-on: macos-latest
@@ -634,6 +633,7 @@ jobs:
634633
-DGGML_METAL_EMBED_LIBRARY=ON \
635634
-DLLAMA_BUILD_COMMON=OFF \
636635
-DLLAMA_BUILD_EXAMPLES=OFF \
636+
-DLLAMA_BUILD_TOOLS=OFF \
637637
-DLLAMA_BUILD_TESTS=OFF \
638638
-DLLAMA_BUILD_SERVER=OFF \
639639
-DCMAKE_SYSTEM_NAME=iOS \
@@ -670,6 +670,7 @@ jobs:
670670
-DGGML_METAL_EMBED_LIBRARY=ON \
671671
-DLLAMA_BUILD_COMMON=OFF \
672672
-DLLAMA_BUILD_EXAMPLES=OFF \
673+
-DLLAMA_BUILD_TOOLS=OFF \
673674
-DLLAMA_BUILD_TESTS=OFF \
674675
-DLLAMA_BUILD_SERVER=OFF \
675676
-DCMAKE_SYSTEM_NAME=tvOS \
@@ -700,6 +701,7 @@ jobs:
700701
-DGGML_METAL_EMBED_LIBRARY=ON \
701702
-DLLAMA_BUILD_COMMON=OFF \
702703
-DLLAMA_BUILD_EXAMPLES=OFF \
704+
-DLLAMA_BUILD_TOOLS=OFF \
703705
-DLLAMA_BUILD_TESTS=OFF \
704706
-DLLAMA_BUILD_SERVER=OFF \
705707
-DCMAKE_SYSTEM_NAME=visionOS \
@@ -740,6 +742,7 @@ jobs:
740742
-DGGML_METAL_EMBED_LIBRARY=ON \
741743
-DLLAMA_CURL=OFF \
742744
-DLLAMA_BUILD_EXAMPLES=OFF \
745+
-DLLAMA_BUILD_TOOLS=OFF \
743746
-DLLAMA_BUILD_TESTS=OFF \
744747
-DLLAMA_BUILD_SERVER=OFF \
745748
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
@@ -1418,6 +1421,7 @@ jobs:
14181421
-DGGML_METAL_EMBED_LIBRARY=ON \
14191422
-DLLAMA_CURL=OFF \
14201423
-DLLAMA_BUILD_EXAMPLES=OFF \
1424+
-DLLAMA_BUILD_TOOLS=OFF \
14211425
-DLLAMA_BUILD_TESTS=OFF \
14221426
-DLLAMA_BUILD_SERVER=OFF \
14231427
-DCMAKE_SYSTEM_NAME=iOS \

0 commit comments

Comments
 (0)