Skip to content

Commit e58d3cc

Browse files
authored
Merge branch 'ggerganov:master' into fix-vulkan-shader-warnings
2 parents 7b7f749 + 01aec4a commit e58d3cc

File tree

164 files changed

+19935
-10545
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

164 files changed

+19935
-10545
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG CUDA_VERSION=11.7.1
66
# Target the CUDA build image
77
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all

.devops/full-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/full.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1

.devops/llama-cli-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -25,7 +25,7 @@ ENV GGML_CUDA=1
2525

2626
RUN make -j$(nproc) llama-cli
2727

28-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
28+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2929

3030
RUN apt-get update && \
3131
apt-get install -y libgomp1

.devops/llama-cli-intel.Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -14,10 +14,12 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1414
echo "GGML_SYCL_F16 is set" && \
1515
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1616
fi && \
17-
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
17+
echo "Building with static libs" && \
18+
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
19+
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
1820
cmake --build build --config Release --target llama-cli
1921

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
22+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2123

2224
COPY --from=build /app/build/bin/llama-cli /llama-cli
2325

.devops/llama-cli-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-cli-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget libgomp1

.devops/llama-cli.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git
@@ -11,7 +11,7 @@ COPY . .
1111

1212
RUN make -j$(nproc) llama-cli
1313

14-
FROM ubuntu:$UBUNTU_VERSION as runtime
14+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1515

1616
RUN apt-get update && \
1717
apt-get install -y libgomp1

.devops/llama-server-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -27,7 +27,7 @@ ENV LLAMA_CURL=1
2727

2828
RUN make -j$(nproc) llama-server
2929

30-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
30+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
3131

3232
RUN apt-get update && \
3333
apt-get install -y libcurl4-openssl-dev libgomp1 curl

.devops/llama-server-intel.Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -14,10 +14,11 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1414
echo "GGML_SYCL_F16 is set" && \
1515
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1616
fi && \
17+
echo "Building with dynamic libs" && \
1718
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
1819
cmake --build build --config Release --target llama-server
1920

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
21+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2122

2223
RUN apt-get update && \
2324
apt-get install -y libcurl4-openssl-dev curl

.devops/llama-server-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-server-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git libcurl4-openssl-dev curl
@@ -13,7 +13,7 @@ ENV LLAMA_CURL=1
1313

1414
RUN make -j$(nproc) llama-server
1515

16-
FROM ubuntu:$UBUNTU_VERSION as runtime
16+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1717

1818
RUN apt-get update && \
1919
apt-get install -y libcurl4-openssl-dev libgomp1

.devops/nix/apps.nix

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"llama-embedding"
1111
"llama-server"
1212
"llama-quantize"
13-
"llama-train-text-from-scratch"
1413
];
1514
mkApp = name: {
1615
type = "app";

.devops/tools.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
./llama-cli "$@"
16-
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
17-
./llama-finetune "$@"
1816
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1917
echo "Converting PTH to GGML..."
2018
for i in `ls $1/$2/ggml-model-f16.bin*`; do
@@ -36,8 +34,6 @@ else
3634
echo " ex: --outtype f16 \"/models/7B/\" "
3735
echo " --quantize (-q): Optimize with quantization process ggml"
3836
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
39-
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
40-
echo " See documentation for finetune for command-line parameters"
4137
echo " --all-in-one (-a): Execute --convert & --quantize"
4238
echo " ex: \"/models/\" 7B"
4339
echo " --server (-s): Run a model on the server"

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ jobs:
860860
mkdir build
861861
cd build
862862
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863-
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
863+
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
864864
865865
- name: Determine tag name
866866
id: tag

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
106106
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
107107
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
108108
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
109+
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
109110

110111
#
111112
# build the library

CONTRIBUTING.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
1-
# Pull requests
1+
# Pull requests (for contributors)
22

3-
- Always squash-merge the PR before merging
4-
- Use the following format for your final commit: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
53
- Test your changes:
64
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
75
- Execute [the full CI locally on your machine](ci/README.md) before publishing
8-
- If the pull request contains only documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times
96
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
10-
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your conveience
7+
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
8+
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
9+
10+
# Pull requests (for collaborators)
11+
12+
- Squash-merge PRs
13+
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
14+
- Optionally, pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
1115

1216
# Coding guidelines
1317

Makefile

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ BUILD_TARGETS = \
1111
llama-embedding \
1212
llama-eval-callback \
1313
llama-export-lora \
14-
llama-finetune \
1514
llama-gbnf-validator \
1615
llama-gguf \
1716
llama-gguf-hash \
@@ -37,7 +36,6 @@ BUILD_TARGETS = \
3736
llama-simple \
3837
llama-speculative \
3938
llama-tokenize \
40-
llama-train-text-from-scratch \
4139
llama-vdot \
4240
llama-cvector-generator \
4341
tests/test-c.o
@@ -64,13 +62,13 @@ TEST_TARGETS = \
6462
tests/test-tokenizer-1-spm
6563

6664
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
67-
LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
65+
LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \
6866
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
69-
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
67+
retrieval speculative infill tokenize benchmark-matmult parallel export-lora lookahead lookup passkey gritlm
7068

7169
# Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
7270
# We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
73-
LEGACY_TARGETS_BUILD = main quantize perplexity embedding server finetune
71+
LEGACY_TARGETS_BUILD = main quantize perplexity embedding server
7472

7573
# Deprecation aliases
7674
ifdef LLAMA_CUBLAS
@@ -795,6 +793,14 @@ ifdef GGML_CUDA_FORCE_DMMV
795793
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
796794
endif # GGML_CUDA_FORCE_DMMV
797795

796+
ifdef GGML_CUDA_FORCE_MMQ
797+
HIPFLAGS += -DGGML_CUDA_FORCE_MMQ
798+
endif # GGML_CUDA_FORCE_MMQ
799+
800+
ifdef GGML_CUDA_FORCE_CUBLAS
801+
HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS
802+
endif # GGML_CUDA_FORCE_CUBLAS
803+
798804
ifdef GGML_CUDA_NO_PEER_COPY
799805
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
800806
endif # GGML_CUDA_NO_PEER_COPY
@@ -868,6 +874,9 @@ OBJ_GGML += \
868874

869875
OBJ_LLAMA = \
870876
src/llama.o \
877+
src/llama-vocab.o \
878+
src/llama-grammar.o \
879+
src/llama-sampling.o \
871880
src/unicode.o \
872881
src/unicode-data.o
873882

@@ -1047,6 +1056,10 @@ src/unicode-data.o: \
10471056

10481057
src/llama.o: \
10491058
src/llama.cpp \
1059+
src/llama-impl.h \
1060+
src/llama-vocab.h \
1061+
src/llama-grammar.h \
1062+
src/llama-sampling.h \
10501063
src/unicode.h \
10511064
include/llama.h \
10521065
ggml/include/ggml-cuda.h \
@@ -1056,6 +1069,29 @@ src/llama.o: \
10561069
ggml/include/ggml-backend.h
10571070
$(CXX) $(CXXFLAGS) -c $< -o $@
10581071

1072+
src/llama-vocab.o: \
1073+
src/llama-vocab.cpp \
1074+
src/llama-vocab.h \
1075+
src/llama-impl.h \
1076+
include/llama.h
1077+
$(CXX) $(CXXFLAGS) -c $< -o $@
1078+
1079+
src/llama-grammar.o: \
1080+
src/llama-grammar.cpp \
1081+
src/llama-grammar.h \
1082+
src/llama-impl.h \
1083+
src/llama-vocab.h \
1084+
src/llama-sampling.h \
1085+
include/llama.h
1086+
$(CXX) $(CXXFLAGS) -c $< -o $@
1087+
1088+
src/llama-sampling.o: \
1089+
src/llama-sampling.cpp \
1090+
src/llama-sampling.h \
1091+
src/llama-impl.h \
1092+
include/llama.h
1093+
$(CXX) $(CXXFLAGS) -c $< -o $@
1094+
10591095
$(LIB_LLAMA): \
10601096
$(OBJ_LLAMA) \
10611097
$(LIB_GGML)
@@ -1258,11 +1294,6 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
12581294
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
12591295
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
12601296

1261-
llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp \
1262-
$(OBJ_ALL)
1263-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1264-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1265-
12661297
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
12671298
$(OBJ_GGML) $(OBJ_LLAMA)
12681299
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
@@ -1278,13 +1309,8 @@ llama-baby-llama: examples/baby-llama/baby-llama.cpp \
12781309
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
12791310
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
12801311

1281-
llama-finetune: examples/finetune/finetune.cpp \
1282-
$(OBJ_ALL)
1283-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1284-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1285-
12861312
llama-export-lora: examples/export-lora/export-lora.cpp \
1287-
$(OBJ_GGML) common/log.h
1313+
$(OBJ_ALL)
12881314
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
12891315
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
12901316

@@ -1431,7 +1457,7 @@ run-benchmark-matmult: llama-benchmark-matmult
14311457
.PHONY: run-benchmark-matmult swift
14321458

14331459
tests/test-llama-grammar: tests/test-llama-grammar.cpp \
1434-
$(OBJ_GGML) $(OBJ_COMMON) src/unicode.o src/unicode-data.o
1460+
$(OBJ_ALL)
14351461
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
14361462
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
14371463

@@ -1540,7 +1566,7 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
15401566
# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
15411567
#
15421568
# Mark legacy binary targets as .PHONY so that they are always checked.
1543-
.PHONY: main quantize perplexity embedding server finetune
1569+
.PHONY: main quantize perplexity embedding server
15441570

15451571
# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
15461572
# Eventually we will want to remove these target from building all the time.
@@ -1583,13 +1609,3 @@ ifneq (,$(wildcard embedding))
15831609
@echo " Remove the 'embedding' binary to remove this warning."
15841610
@echo "#########"
15851611
endif
1586-
1587-
finetune: examples/deprecation-warning/deprecation-warning.cpp
1588-
ifneq (,$(wildcard finetune))
1589-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1590-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1591-
@echo "#########"
1592-
@echo "WARNING: The 'finetune' binary is deprecated. Please use 'llama-finetune' instead."
1593-
@echo " Remove the 'finetune' binary to remove this warning."
1594-
@echo "#########"
1595-
endif

0 commit comments

Comments
 (0)