Skip to content

Commit 3f96aef

Browse files
authored
llama : one-off chat template fix for Mistral-Small-2503 (#13398)
* llama : one-off chat template fix for Mistral-Small-2503 * update readme * add mistral-v7-tekken
1 parent b486ba0 commit 3f96aef

File tree

4 files changed

+18
-7
lines changed

4 files changed

+18
-7
lines changed

src/llama-chat.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
3535
{ "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
3636
{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
3737
{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38+
{ "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
3839
{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
3940
{ "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
4041
{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
@@ -202,19 +203,20 @@ int32_t llm_chat_apply_template(
202203
if (add_ass) {
203204
ss << "<|im_start|>assistant\n";
204205
}
205-
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
206+
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
206207
// Official mistral 'v7' template
207208
// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
209+
// https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
210+
const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
208211
for (auto message : chat) {
209212
std::string role(message->role);
210213
std::string content(message->content);
211214
if (role == "system") {
212-
ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
215+
ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
213216
} else if (role == "user") {
214-
ss << "[INST] " << content << "[/INST]";
215-
}
216-
else {
217-
ss << " " << content << "</s>";
217+
ss << "[INST]" << trailing_space << content << "[/INST]";
218+
} else {
219+
ss << trailing_space << content << "</s>";
218220
}
219221
}
220222
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1

src/llama-chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ enum llm_chat_template {
1414
LLM_CHAT_TEMPLATE_MISTRAL_V3,
1515
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
1616
LLM_CHAT_TEMPLATE_MISTRAL_V7,
17+
LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN,
1718
LLM_CHAT_TEMPLATE_PHI_3,
1819
LLM_CHAT_TEMPLATE_PHI_4,
1920
LLM_CHAT_TEMPLATE_FALCON_3,

src/llama-model.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13387,6 +13387,14 @@ const char * llama_model_chat_template(const llama_model * model, const char * n
1338713387
: LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE);
1338813388
const auto & it = model->gguf_kv.find(key);
1338913389
if (it == model->gguf_kv.end()) {
13390+
// one-off fix for very popular models (so we are not flooded with issues)
13391+
// do not extend this list unless absolutely necessary
13392+
// Mistral-Small-2503 does not have built-in chat template
13393+
llama_vocab_pre_type pre_type = model->vocab.get_pre_type();
13394+
if (pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) {
13395+
return "mistral-v7-tekken";
13396+
}
13397+
1339013398
return nullptr;
1339113399
}
1339213400

tools/mtmd/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-32B-Instruct-GGUF
4646
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-72B-Instruct-GGUF
4747

4848
# Mistral Small 3.1 24B (IQ2_M quantization)
49-
llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF --chat-template mistral-v7
49+
llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF
5050
```
5151

5252
## How it works and what is `mmproj`?

0 commit comments

Comments
 (0)