Skip to content

Commit 2b796a9

Browse files
authored
FEAT: ovis2 (#3170)
1 parent aa4d536 commit 2b796a9

File tree

5 files changed

+544
-0
lines changed

5 files changed

+544
-0
lines changed

xinference/model/llm/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def _install():
151151
from .transformers.minicpmv25 import MiniCPMV25Model
152152
from .transformers.minicpmv26 import MiniCPMV26Model
153153
from .transformers.opt import OptPytorchModel
154+
from .transformers.ovis2 import Ovis2ChatModel
154155
from .transformers.qwen2_audio import Qwen2AudioChatModel
155156
from .transformers.qwen_vl import QwenVLChatModel
156157
from .transformers.yi_vl import YiVLChatModel
@@ -199,6 +200,7 @@ def _install():
199200
CogAgentChatModel,
200201
Gemma3TextChatModel,
201202
Gemma3ChatModel,
203+
Ovis2ChatModel,
202204
]
203205
)
204206
if OmniLMMModel: # type: ignore

xinference/model/llm/llm_family.json

+114
Original file line numberDiff line numberDiff line change
@@ -11181,6 +11181,120 @@
1118111181
]
1118211182
}
1118311183
},
11184+
{
11185+
"version":1,
11186+
"context_length":32768,
11187+
"model_name":"Ovis2",
11188+
"model_lang":[
11189+
"en",
11190+
"zh"
11191+
],
11192+
"model_ability":[
11193+
"chat",
11194+
"vision"
11195+
],
11196+
"model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
11197+
"model_specs":[
11198+
{
11199+
"model_format":"pytorch",
11200+
"model_size_in_billions":1,
11201+
"quantizations":[
11202+
"none"
11203+
],
11204+
"model_id":"AIDC-AI/Ovis2-1B"
11205+
},
11206+
{
11207+
"model_format":"pytorch",
11208+
"model_size_in_billions":2,
11209+
"quantizations":[
11210+
"none"
11211+
],
11212+
"model_id":"AIDC-AI/Ovis2-2B"
11213+
},
11214+
{
11215+
"model_format":"pytorch",
11216+
"model_size_in_billions":4,
11217+
"quantizations":[
11218+
"none"
11219+
],
11220+
"model_id":"AIDC-AI/Ovis2-4B"
11221+
},
11222+
{
11223+
"model_format":"pytorch",
11224+
"model_size_in_billions":8,
11225+
"quantizations":[
11226+
"none"
11227+
],
11228+
"model_id":"AIDC-AI/Ovis2-8B"
11229+
},
11230+
{
11231+
"model_format":"pytorch",
11232+
"model_size_in_billions":16,
11233+
"quantizations":[
11234+
"none"
11235+
],
11236+
"model_id":"AIDC-AI/Ovis2-16B"
11237+
},
11238+
{
11239+
"model_format":"pytorch",
11240+
"model_size_in_billions":34,
11241+
"quantizations":[
11242+
"none"
11243+
],
11244+
"model_id":"AIDC-AI/Ovis2-34B"
11245+
},
11246+
{
11247+
"model_format":"gptq",
11248+
"model_size_in_billions":2,
11249+
"quantizations":[
11250+
"Int4"
11251+
],
11252+
"model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}"
11253+
},
11254+
{
11255+
"model_format":"gptq",
11256+
"model_size_in_billions":4,
11257+
"quantizations":[
11258+
"Int4"
11259+
],
11260+
"model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}"
11261+
},
11262+
{
11263+
"model_format":"gptq",
11264+
"model_size_in_billions":8,
11265+
"quantizations":[
11266+
"Int4"
11267+
],
11268+
"model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}"
11269+
},
11270+
{
11271+
"model_format":"gptq",
11272+
"model_size_in_billions":16,
11273+
"quantizations":[
11274+
"Int4"
11275+
],
11276+
"model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}"
11277+
},
11278+
{
11279+
"model_format":"gptq",
11280+
"model_size_in_billions":34,
11281+
"quantizations":[
11282+
"Int4",
11283+
"Int8"
11284+
],
11285+
"model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}"
11286+
}
11287+
],
11288+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
11289+
"stop_token_ids": [
11290+
151645,
11291+
151643
11292+
],
11293+
"stop": [
11294+
"<|im_end|>",
11295+
"<|endoftext|>"
11296+
]
11297+
},
1118411298
{
1118511299
"version": 1,
1118611300
"context_length": 32768,

xinference/model/llm/llm_family_modelscope.json

+125
Original file line numberDiff line numberDiff line change
@@ -8968,6 +8968,131 @@
89688968
]
89698969
}
89708970
},
8971+
{
8972+
"version":1,
8973+
"context_length":32768,
8974+
"model_name":"Ovis2",
8975+
"model_lang":[
8976+
"en",
8977+
"zh"
8978+
],
8979+
"model_ability":[
8980+
"chat",
8981+
"vision"
8982+
],
8983+
"model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
8984+
"model_specs":[
8985+
{
8986+
"model_format":"pytorch",
8987+
"model_size_in_billions":1,
8988+
"quantizations":[
8989+
"none"
8990+
],
8991+
"model_id":"AIDC-AI/Ovis2-1B",
8992+
"model_hub": "modelscope"
8993+
},
8994+
{
8995+
"model_format":"pytorch",
8996+
"model_size_in_billions":2,
8997+
"quantizations":[
8998+
"none"
8999+
],
9000+
"model_id":"AIDC-AI/Ovis2-2B",
9001+
"model_hub": "modelscope"
9002+
},
9003+
{
9004+
"model_format":"pytorch",
9005+
"model_size_in_billions":4,
9006+
"quantizations":[
9007+
"none"
9008+
],
9009+
"model_id":"AIDC-AI/Ovis2-4B",
9010+
"model_hub": "modelscope"
9011+
},
9012+
{
9013+
"model_format":"pytorch",
9014+
"model_size_in_billions":8,
9015+
"quantizations":[
9016+
"none"
9017+
],
9018+
"model_id":"AIDC-AI/Ovis2-8B",
9019+
"model_hub": "modelscope"
9020+
},
9021+
{
9022+
"model_format":"pytorch",
9023+
"model_size_in_billions":16,
9024+
"quantizations":[
9025+
"none"
9026+
],
9027+
"model_id":"AIDC-AI/Ovis2-16B",
9028+
"model_hub": "modelscope"
9029+
},
9030+
{
9031+
"model_format":"pytorch",
9032+
"model_size_in_billions":34,
9033+
"quantizations":[
9034+
"none"
9035+
],
9036+
"model_id":"AIDC-AI/Ovis2-34B",
9037+
"model_hub": "modelscope"
9038+
},
9039+
{
9040+
"model_format":"gptq",
9041+
"model_size_in_billions":2,
9042+
"quantizations":[
9043+
"Int4"
9044+
],
9045+
"model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}",
9046+
"model_hub": "modelscope"
9047+
},
9048+
{
9049+
"model_format":"gptq",
9050+
"model_size_in_billions":4,
9051+
"quantizations":[
9052+
"Int4"
9053+
],
9054+
"model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}",
9055+
"model_hub": "modelscope"
9056+
},
9057+
{
9058+
"model_format":"gptq",
9059+
"model_size_in_billions":8,
9060+
"quantizations":[
9061+
"Int4"
9062+
],
9063+
"model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}",
9064+
"model_hub": "modelscope"
9065+
},
9066+
{
9067+
"model_format":"gptq",
9068+
"model_size_in_billions":16,
9069+
"quantizations":[
9070+
"Int4"
9071+
],
9072+
"model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}",
9073+
"model_hub": "modelscope"
9074+
},
9075+
{
9076+
"model_format":"gptq",
9077+
"model_size_in_billions":34,
9078+
"quantizations":[
9079+
"Int4",
9080+
"Int8"
9081+
],
9082+
"model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}",
9083+
"model_hub": "modelscope"
9084+
}
9085+
],
9086+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
9087+
"stop_token_ids": [
9088+
151645,
9089+
151643
9090+
],
9091+
"stop": [
9092+
"<|im_end|>",
9093+
"<|endoftext|>"
9094+
]
9095+
},
89719096
{
89729097
"version": 1,
89739098
"context_length": 32768,

xinference/model/llm/transformers/core.py

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
"cogagent",
7676
"gemma-3-1b-it",
7777
"gemma-3-it",
78+
"Ovis2",
7879
"deepseek-vl2",
7980
]
8081

0 commit comments

Comments
 (0)