Skip to content

Commit 5481cec

Browse files
authored
Merge branch 'ggerganov:master' into gguf-model-template
2 parents f455e82 + 87e397d commit 5481cec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+13478
-1441
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG CUDA_VERSION=11.7.1
66
# Target the CUDA build image
77
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all

.devops/full-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/full.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1

.devops/llama-cli-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -25,7 +25,7 @@ ENV GGML_CUDA=1
2525

2626
RUN make -j$(nproc) llama-cli
2727

28-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
28+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2929

3030
RUN apt-get update && \
3131
apt-get install -y libgomp1

.devops/llama-cli-intel.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
1818
cmake --build build --config Release --target llama-cli
1919

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
20+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2121

2222
COPY --from=build /app/build/bin/llama-cli /llama-cli
2323

.devops/llama-cli-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-cli-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget libgomp1

.devops/llama-cli.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git
@@ -11,7 +11,7 @@ COPY . .
1111

1212
RUN make -j$(nproc) llama-cli
1313

14-
FROM ubuntu:$UBUNTU_VERSION as runtime
14+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1515

1616
RUN apt-get update && \
1717
apt-get install -y libgomp1

.devops/llama-server-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -27,7 +27,7 @@ ENV LLAMA_CURL=1
2727

2828
RUN make -j$(nproc) llama-server
2929

30-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
30+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
3131

3232
RUN apt-get update && \
3333
apt-get install -y libcurl4-openssl-dev libgomp1 curl

.devops/llama-server-intel.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
1818
cmake --build build --config Release --target llama-server
1919

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
20+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2121

2222
RUN apt-get update && \
2323
apt-get install -y libcurl4-openssl-dev curl

.devops/llama-server-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-server-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git libcurl4-openssl-dev curl
@@ -13,7 +13,7 @@ ENV LLAMA_CURL=1
1313

1414
RUN make -j$(nproc) llama-server
1515

16-
FROM ubuntu:$UBUNTU_VERSION as runtime
16+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1717

1818
RUN apt-get update && \
1919
apt-get install -y libcurl4-openssl-dev libgomp1

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
106106
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
107107
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
108108
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
109+
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
109110

110111
#
111112
# build the library

CONTRIBUTING.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
- Test your changes:
66
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
77
- Execute [the full CI locally on your machine](ci/README.md) before publishing
8-
- If the pull request contains only documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times
98
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
109
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your conveience
1110

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,14 @@ ifdef GGML_CUDA_FORCE_DMMV
795795
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
796796
endif # GGML_CUDA_FORCE_DMMV
797797

798+
ifdef GGML_CUDA_FORCE_MMQ
799+
HIPFLAGS += -DGGML_CUDA_FORCE_MMQ
800+
endif # GGML_CUDA_FORCE_MMQ
801+
802+
ifdef GGML_CUDA_FORCE_CUBLAS
803+
HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS
804+
endif # GGML_CUDA_FORCE_CUBLAS
805+
798806
ifdef GGML_CUDA_NO_PEER_COPY
799807
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
800808
endif # GGML_CUDA_NO_PEER_COPY

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
44

55
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6-
[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg?branch=master&event=schedule)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml)
6+
[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml)
77
[![Conan Center](https://shields.io/conan/v/llama-cpp)](https://conan.io/center/llama-cpp)
88

99
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)

common/common.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -685,15 +685,13 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
685685
if (arg == "--lora") {
686686
CHECK_ARG
687687
params.lora_adapter.emplace_back(argv[i], 1.0f);
688-
params.use_mmap = false;
689688
return true;
690689
}
691690
if (arg == "--lora-scaled") {
692691
CHECK_ARG
693692
const char* lora_adapter = argv[i];
694693
CHECK_ARG
695694
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
696-
params.use_mmap = false;
697695
return true;
698696
}
699697
if (arg == "--lora-base") {
@@ -2089,19 +2087,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
20892087
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
20902088
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
20912089
float lora_scale = std::get<1>(params.lora_adapter[i]);
2092-
int err = llama_model_apply_lora_from_file(model,
2093-
lora_adapter.c_str(),
2094-
lora_scale,
2095-
((i > 0) || params.lora_base.empty())
2096-
? NULL
2097-
: params.lora_base.c_str(),
2098-
params.n_threads);
2099-
if (err != 0) {
2090+
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
2091+
if (adapter == nullptr) {
21002092
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
21012093
llama_free(lctx);
21022094
llama_free_model(model);
21032095
return std::make_tuple(nullptr, nullptr);
21042096
}
2097+
llama_lora_adapter_set(lctx, adapter, lora_scale);
21052098
}
21062099

21072100
if (params.ignore_eos) {

common/ngram-cache.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,18 @@ struct llama_ngram {
3737
}
3838
};
3939

40+
struct llama_token_hash_function {
41+
size_t operator()(const llama_token token) const {
42+
// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
43+
return token * 11400714819323198485llu;
44+
}
45+
};
46+
4047
struct llama_ngram_hash_function {
4148
size_t operator()(const llama_ngram & ngram) const {
42-
size_t hash = 0;
43-
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
44-
hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
49+
size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
50+
for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
51+
hash ^= llama_token_hash_function{}(ngram.tokens[i]);
4552
}
4653
return hash;
4754
}

0 commit comments

Comments
 (0)