Skip to content

Commit e6c10cf

Browse files
committed
talk-llama : sync llama.cpp
ggml-ci
1 parent d65a579 commit e6c10cf

28 files changed

+2517
-1734
lines changed

examples/talk-llama/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ if (WHISPER_SDL2)
1818
llama-io.cpp
1919
llama-kv-cache-unified.cpp
2020
llama-kv-cache-unified-iswa.cpp
21-
llama-kv-cache-recurrent.cpp
21+
llama-memory-recurrent.cpp
22+
llama-memory-hybrid.cpp
2223
llama-memory.cpp
2324
llama-mmap.cpp
2425
llama-model-loader.cpp

examples/talk-llama/llama-arch.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
147147
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
148148
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
149149
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
150+
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
150151

151152
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
152153
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -197,6 +198,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
197198
{ LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
198199
{ LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
199200
{ LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
201+
{ LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
200202
{ LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
201203
{ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
202204
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
@@ -1816,3 +1818,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
18161818
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
18171819
return LLM_TENSOR_INFOS.at(tensor);
18181820
}
1821+
1822+
bool llm_arch_is_recurrent(const llm_arch & arch) {
1823+
switch (arch) {
1824+
case LLM_ARCH_MAMBA:
1825+
case LLM_ARCH_RWKV6:
1826+
case LLM_ARCH_RWKV6QWEN2:
1827+
case LLM_ARCH_RWKV7:
1828+
case LLM_ARCH_ARWKV7:
1829+
return true;
1830+
default:
1831+
return false;
1832+
}
1833+
}
1834+
1835+
bool llm_arch_is_hybrid(const llm_arch & arch) {
1836+
// TODO: There are currently no hybrid models! Once there are, this will be
1837+
// the place to identify them
1838+
switch (arch) {
1839+
default:
1840+
return false;
1841+
}
1842+
}

examples/talk-llama/llama-arch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ enum llm_kv {
151151
LLM_KV_ATTENTION_SCALE,
152152
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
153153
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
154+
LLM_KV_ATTENTION_LAYER_INDICES,
154155

155156
LLM_KV_ROPE_DIMENSION_COUNT,
156157
LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -193,6 +194,7 @@ enum llm_kv {
193194
LLM_KV_TOKENIZER_MASK_ID,
194195
LLM_KV_TOKENIZER_ADD_BOS,
195196
LLM_KV_TOKENIZER_ADD_EOS,
197+
LLM_KV_TOKENIZER_ADD_SEP,
196198
LLM_KV_TOKENIZER_ADD_PREFIX,
197199
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
198200
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
@@ -439,3 +441,6 @@ const char * llm_arch_name(llm_arch arch);
439441
llm_arch llm_arch_from_string(const std::string & name);
440442

441443
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
444+
445+
bool llm_arch_is_recurrent(const llm_arch & arch);
446+
bool llm_arch_is_hybrid (const llm_arch & arch);

0 commit comments

Comments
 (0)