Skip to content

Commit aa7c37f

Browse files
authored
Merge pull request #152 from runpod-workers/fix0.7.0-1
update oai serving classes
2 parents a948e90 + dc8c880 commit aa7c37f

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

src/engine.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
import asyncio
55

66
from dotenv import load_dotenv
7-
from typing import AsyncGenerator
7+
from typing import AsyncGenerator, Optional
88
import time
99

1010
from vllm import AsyncLLMEngine
11+
from vllm.entrypoints.logger import RequestLogger
1112
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
1213
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
1314
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, CompletionRequest, ErrorResponse
@@ -151,17 +152,22 @@ async def _initialize_engines(self):
151152
model_config=self.model_config,
152153
models=self.serving_models,
153154
response_role=self.response_role,
155+
request_logger=None,
154156
chat_template=self.tokenizer.tokenizer.chat_template,
157+
chat_template_content_format="auto",
158+
enable_reasoning=os.getenv('ENABLE_REASONING', 'false').lower() == 'true',
159+
reasoning_parser=None,
160+
return_token_as_token_ids=False,
155161
enable_auto_tools=os.getenv('ENABLE_AUTO_TOOL_CHOICE', 'false').lower() == 'true',
156162
tool_parser=os.getenv('TOOL_CALL_PARSER', "") or None,
157-
lora_modules=lora_modules,
158-
chat_template_content_format="auto",
163+
enable_prompt_tokens_details=False
159164
)
160165
self.completion_engine = OpenAIServingCompletion(
161166
engine_client=self.llm,
162167
model_config=self.model_config,
163168
models=self.serving_models,
164-
lora_modules=lora_modules,
169+
request_logger=None,
170+
return_token_as_token_ids=False,
165171
)
166172

167173
async def generate(self, openai_request: JobInput):

0 commit comments

Comments
 (0)