diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py index b82f0430c5f782..8d86d6937d8ac9 100644 --- a/api/core/model_runtime/model_providers/xinference/llm/llm.py +++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py @@ -63,6 +63,9 @@ ) from core.model_runtime.utils import helper +DEFAULT_MAX_RETRIES = 3 +DEFAULT_INVOKE_TIMEOUT = 60 + class XinferenceAILargeLanguageModel(LargeLanguageModel): def _invoke( @@ -315,7 +318,12 @@ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: message_dict = {"role": "system", "content": message.content} elif isinstance(message, ToolPromptMessage): message = cast(ToolPromptMessage, message) - message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content} + message_dict = { + "tool_call_id": message.tool_call_id, + "role": "tool", + "content": message.content, + "name": message.name, + } else: raise ValueError(f"Unknown message type {type(message)}") @@ -466,8 +474,8 @@ def _generate( client = OpenAI( base_url=f'{credentials["server_url"]}/v1', api_key=api_key, - max_retries=3, - timeout=60, + max_retries=int(credentials.get("max_retries") or DEFAULT_MAX_RETRIES), + timeout=int(credentials.get("invoke_timeout") or DEFAULT_INVOKE_TIMEOUT), ) xinference_client = Client( diff --git a/api/core/model_runtime/model_providers/xinference/xinference.yaml b/api/core/model_runtime/model_providers/xinference/xinference.yaml index be9073c1cab1f4..3500136693fb48 100644 --- a/api/core/model_runtime/model_providers/xinference/xinference.yaml +++ b/api/core/model_runtime/model_providers/xinference/xinference.yaml @@ -56,3 +56,23 @@ model_credential_schema: placeholder: zh_Hans: 在此输入您的API密钥 en_US: Enter the api key + - variable: invoke_timeout + label: + zh_Hans: 调用超时时间 (单位:秒) + en_US: invoke timeout (unit:second) + type: text-input + required: true + default: '60' + placeholder: + zh_Hans: 在此输入调用超时时间 + en_US: Enter invoke timeout value + - variable: max_retries + label: + zh_Hans: 调用重试次数 + en_US: max retries + type: text-input + required: true + default: '3' + placeholder: + zh_Hans: 在此输入调用重试次数 + en_US: Enter max retries