Skip to content

Commit 82aac97

Browse files
zqfzqfbinary-husky
andauthored
阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持 (#2182)
* 阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持 * update reasoning display --------- Co-authored-by: binary-husky <[email protected]>
1 parent 045cdb1 commit 82aac97

File tree

4 files changed

+51
-11
lines changed

4 files changed

+51
-11
lines changed

config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"gemini-1.5-pro", "chatglm3", "chatglm4",
4646
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
4747
"volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
48+
"dashscope-deepseek-r1", "dashscope-deepseek-v3",
4849
]
4950

5051
EMBEDDING_MODEL = "text-embedding-3-small"

request_llms/bridge_all.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,9 @@ def decode(self, *args, **kwargs):
813813
})
814814
except:
815815
logger.error(trimmed_format_exc())
816-
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
817-
qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"]
816+
817+
# -=-=-=-=-=-=- 阿里云百炼(通义)-在线模型 -=-=-=-=-=-=-
818+
qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus","dashscope-deepseek-r1","dashscope-deepseek-v3"]
818819
if any(item in qwen_models for item in AVAIL_LLM_MODELS):
819820
try:
820821
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
@@ -864,10 +865,30 @@ def decode(self, *args, **kwargs):
864865
"max_token": 30720,
865866
"tokenizer": tokenizer_gpt35,
866867
"token_cnt": get_token_num_gpt35,
868+
},
869+
"dashscope-deepseek-r1": {
870+
"fn_with_ui": qwen_ui,
871+
"fn_without_ui": qwen_noui,
872+
"enable_reasoning": True,
873+
"can_multi_thread": True,
874+
"endpoint": None,
875+
"max_token": 57344,
876+
"tokenizer": tokenizer_gpt35,
877+
"token_cnt": get_token_num_gpt35,
878+
},
879+
"dashscope-deepseek-v3": {
880+
"fn_with_ui": qwen_ui,
881+
"fn_without_ui": qwen_noui,
882+
"can_multi_thread": True,
883+
"endpoint": None,
884+
"max_token": 57344,
885+
"tokenizer": tokenizer_gpt35,
886+
"token_cnt": get_token_num_gpt35,
867887
}
868888
})
869889
except:
870890
logger.error(trimmed_format_exc())
891+
871892
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
872893
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
873894
if any(item in yi_models for item in AVAIL_LLM_MODELS):

request_llms/bridge_chatgpt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,12 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
368368
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
369369
break # 对于符合规范的接口,这里可以break
370370
else:
371-
continue # 对于不符合规范的狗屎接口,这里需要继续
371+
continue # 对于不符合规范的接口,这里需要继续
372372
# 到这里,我们已经可以假定必须包含choice了
373373
try:
374374
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
375375
except:
376-
logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
376+
logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
377377
# 处理数据流的主体
378378
if has_content:
379379
# 正常情况
@@ -382,9 +382,9 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
382382
# 一些第三方接口的出现这样的错误,兼容一下吧
383383
continue
384384
else:
385-
# 至此已经超出了正常接口应该进入的范围,一些垃圾第三方接口会出现这样的错误
385+
# 至此已经超出了正常接口应该进入的范围,一些第三方接口会出现这样的错误
386386
if chunkjson['choices'][0]["delta"].get("content", None) is None:
387-
logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
387+
logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
388388
continue
389389
gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
390390

request_llms/com_qwenapi.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import threading
44

55
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
6+
model_prefix_to_remove = 'dashscope-'
67

78
class QwenRequestInstance():
89
def __init__(self):
@@ -20,6 +21,13 @@ def validate_key():
2021
raise RuntimeError('请配置 DASHSCOPE_API_KEY')
2122
dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
2223

24+
def format_reasoning(self, reasoning_content:str, main_content:str):
25+
if reasoning_content:
26+
reasoning_content_paragraphs = ''.join([f'<p style="margin: 1.25em 0;">{line}</p>' for line in reasoning_content.split('\n')])
27+
formatted_reasoning_content = f'<div class="reasoning_process">{reasoning_content_paragraphs}</div>\n\n---\n\n'
28+
return formatted_reasoning_content + main_content
29+
else:
30+
return main_content
2331

2432
def generate(self, inputs, llm_kwargs, history, system_prompt):
2533
# import _thread as thread
@@ -28,9 +36,13 @@ def generate(self, inputs, llm_kwargs, history, system_prompt):
2836
if top_p == 0: top_p += 1e-5
2937
if top_p == 1: top_p -= 1e-5
3038

39+
model_name = llm_kwargs['llm_model']
40+
if model_name.startswith(model_prefix_to_remove): model_name = model_name[len(model_prefix_to_remove):]
41+
42+
self.reasoning_buf = ""
3143
self.result_buf = ""
3244
responses = Generation.call(
33-
model=llm_kwargs['llm_model'],
45+
model=model_name,
3446
messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
3547
top_p=top_p,
3648
temperature=llm_kwargs.get('temperature', 1.0),
@@ -46,18 +58,24 @@ def generate(self, inputs, llm_kwargs, history, system_prompt):
4658
self.result_buf += response.output.choices[0].message.content
4759
except:
4860
pass
49-
yield self.result_buf
61+
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
5062
break
5163
elif response.output.choices[0].finish_reason == 'length':
5264
self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
53-
yield self.result_buf
65+
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
5466
break
5567
else:
68+
try:
69+
contain_reasoning = hasattr(response.output.choices[0].message, 'reasoning_content')
70+
except:
71+
contain_reasoning = False
72+
if contain_reasoning:
73+
self.reasoning_buf += response.output.choices[0].message.reasoning_content
5674
self.result_buf += response.output.choices[0].message.content
57-
yield self.result_buf
75+
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
5876
else:
5977
self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
60-
yield self.result_buf
78+
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
6179
break
6280

6381
# 耗尽generator避免报错

0 commit comments

Comments
 (0)