Skip to content

Commit a37da2e

Browse files
Jun-Howieqinxuye
andauthored
ENH: Support GLM4-0414 MLX and GGUF (#3325)
Co-authored-by: qinxuye <[email protected]>
1 parent 452dc9b commit a37da2e

File tree

4 files changed

+192
-4
lines changed

4 files changed

+192
-4
lines changed

xinference/model/llm/llm_family.json

+93-1
Original file line numberDiff line numberDiff line change
@@ -11075,6 +11075,91 @@
1107511075
"none"
1107611076
],
1107711077
"model_id": "THUDM/GLM-4-32B-0414"
11078+
},
11079+
{
11080+
"model_format": "mlx",
11081+
"model_size_in_billions": 9,
11082+
"quantizations": [
11083+
"4bit",
11084+
"6bit",
11085+
"8bit",
11086+
"bf16"
11087+
],
11088+
"model_id": "mlx-community/GLM-4-9B-0414-{quantization}"
11089+
},
11090+
{
11091+
"model_format": "mlx",
11092+
"model_size_in_billions": 32,
11093+
"quantizations": [
11094+
"4bit",
11095+
"8bit"
11096+
],
11097+
"model_id": "mlx-community/GLM-4-32B-0414-{quantization}"
11098+
},
11099+
{
11100+
"model_format": "ggufv2",
11101+
"model_size_in_billions": 9,
11102+
"quantizations": [
11103+
"IQ2_M",
11104+
"IQ3_M",
11105+
"IQ3_XS",
11106+
"IQ3_XXS",
11107+
"IQ4_NL",
11108+
"IQ4_XS",
11109+
"Q2_K",
11110+
"Q2_K_L",
11111+
"Q3_K_L",
11112+
"Q3_K_M",
11113+
"Q3_K_S",
11114+
"Q3_K_XL",
11115+
"Q4_0",
11116+
"Q4_1",
11117+
"Q4_K_L",
11118+
"Q4_K_M",
11119+
"Q4_K_S",
11120+
"Q5_K_L",
11121+
"Q5_K_M",
11122+
"Q5_K_S",
11123+
"Q6_K",
11124+
"Q6_K_L",
11125+
"Q8_0",
11126+
"bf16"
11127+
],
11128+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
11129+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf"
11130+
},
11131+
{
11132+
"model_format": "ggufv2",
11133+
"model_size_in_billions": 32,
11134+
"quantizations": [
11135+
"IQ2_M",
11136+
"IQ2_S",
11137+
"IQ2_XS",
11138+
"IQ3_M",
11139+
"IQ3_XS",
11140+
"IQ3_XXS",
11141+
"IQ4_NL",
11142+
"IQ4_XS",
11143+
"Q2_K",
11144+
"Q2_K_L",
11145+
"Q3_K_L",
11146+
"Q3_K_M",
11147+
"Q3_K_S",
11148+
"Q3_K_XL",
11149+
"Q4_0",
11150+
"Q4_1",
11151+
"Q4_K_L",
11152+
"Q4_K_M",
11153+
"Q4_K_S",
11154+
"Q5_K_L",
11155+
"Q5_K_M",
11156+
"Q5_K_S",
11157+
"Q6_K",
11158+
"Q6_K_L",
11159+
"Q8_0"
11160+
],
11161+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
11162+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf"
1107811163
}
1107911164
],
1108011165
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
@@ -11087,7 +11172,14 @@
1108711172
"<|endoftext|>",
1108811173
"<|user|>",
1108911174
"<|observation|>"
11090-
]
11175+
],
11176+
"virtualenv": {
11177+
"packages": [
11178+
"transformers>=4.51.3",
11179+
"mlx-lm>=0.23.1 ; sys_platform=='darwin'",
11180+
"numpy==1.26.4"
11181+
]
11182+
}
1109111183
},
1109211184
{
1109311185
"version": 1,

xinference/model/llm/llm_family_modelscope.json

+97-1
Original file line numberDiff line numberDiff line change
@@ -8858,6 +8858,95 @@
88588858
],
88598859
"model_id": "ZhipuAI/GLM-4-32B-0414",
88608860
"model_hub": "modelscope"
8861+
},
8862+
{
8863+
"model_format": "mlx",
8864+
"model_size_in_billions": 9,
8865+
"quantizations": [
8866+
"4bit",
8867+
"6bit",
8868+
"8bit",
8869+
"bf16"
8870+
],
8871+
"model_id": "mlx-community/GLM-4-9B-0414-{quantization}",
8872+
"model_hub": "modelscope"
8873+
},
8874+
{
8875+
"model_format": "mlx",
8876+
"model_size_in_billions": 32,
8877+
"quantizations": [
8878+
"4bit",
8879+
"8bit"
8880+
],
8881+
"model_id": "mlx-community/GLM-4-32B-0414-{quantization}",
8882+
"model_hub": "modelscope"
8883+
},
8884+
{
8885+
"model_format": "ggufv2",
8886+
"model_size_in_billions": 9,
8887+
"quantizations": [
8888+
"IQ2_M",
8889+
"IQ3_M",
8890+
"IQ3_XS",
8891+
"IQ3_XXS",
8892+
"IQ4_NL",
8893+
"IQ4_XS",
8894+
"Q2_K",
8895+
"Q2_K_L",
8896+
"Q3_K_L",
8897+
"Q3_K_M",
8898+
"Q3_K_S",
8899+
"Q3_K_XL",
8900+
"Q4_0",
8901+
"Q4_1",
8902+
"Q4_K_L",
8903+
"Q4_K_M",
8904+
"Q4_K_S",
8905+
"Q5_K_L",
8906+
"Q5_K_M",
8907+
"Q5_K_S",
8908+
"Q6_K",
8909+
"Q6_K_L",
8910+
"Q8_0",
8911+
"bf16"
8912+
],
8913+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
8914+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
8915+
"model_hub": "modelscope"
8916+
},
8917+
{
8918+
"model_format": "ggufv2",
8919+
"model_size_in_billions": 32,
8920+
"quantizations": [
8921+
"IQ2_M",
8922+
"IQ2_S",
8923+
"IQ2_XS",
8924+
"IQ3_M",
8925+
"IQ3_XS",
8926+
"IQ3_XXS",
8927+
"IQ4_NL",
8928+
"IQ4_XS",
8929+
"Q2_K",
8930+
"Q2_K_L",
8931+
"Q3_K_L",
8932+
"Q3_K_M",
8933+
"Q3_K_S",
8934+
"Q3_K_XL",
8935+
"Q4_0",
8936+
"Q4_1",
8937+
"Q4_K_L",
8938+
"Q4_K_M",
8939+
"Q4_K_S",
8940+
"Q5_K_L",
8941+
"Q5_K_M",
8942+
"Q5_K_S",
8943+
"Q6_K",
8944+
"Q6_K_L",
8945+
"Q8_0"
8946+
],
8947+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
8948+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
8949+
"model_hub": "modelscope"
88618950
}
88628951
],
88638952
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
@@ -8870,7 +8959,14 @@
88708959
"<|endoftext|>",
88718960
"<|user|>",
88728961
"<|observation|>"
8873-
]
8962+
],
8963+
"virtualenv": {
8964+
"packages": [
8965+
"transformers>=4.51.3",
8966+
"mlx-lm>=0.23.1 ; sys_platform=='darwin'",
8967+
"numpy==1.26.4"
8968+
]
8969+
}
88748970
},
88758971
{
88768972
"version": 1,

xinference/model/llm/transformers/deepseek_vl.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def match_json(
4646
cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
4747
) -> bool:
4848
llm_family = model_family.model_family or model_family.model_name
49-
if "deepseek-vl" == llm_family.lower():
49+
if "deepseek-vl-chat" == llm_family.lower():
5050
return True
5151
return False
5252

xinference/web/ui/src/scenes/launch_model/data/data.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export const featureModels = [
6868
'deepseek-r1-distill-llama',
6969
'qwen2.5-instruct',
7070
'qwen2.5-vl-instruct',
71-
'qwen2.5-coder-instruct',
71+
'glm4-0414',
7272
'QwQ-32B',
7373
'llama-3.1-instruct',
7474
'gemma-3-it',

0 commit comments

Comments
 (0)