Skip to content

Commit 9f12227

Browse files
authored
Revert "Simplify max_output_tokens handling in LLM classes (#9296)"
This reverts commit 1e33624.
1 parent 1fd0aef commit 9f12227

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

openhands/llm/llm.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ def __init__(
163163
'temperature': self.config.temperature,
164164
'max_completion_tokens': self.config.max_output_tokens,
165165
}
166-
167166
if self.config.top_k is not None:
168167
# openai doesn't expose top_k
169168
# litellm will handle it a bit differently than the openai-compatible params
@@ -493,6 +492,26 @@ def init_model_info(self) -> None:
493492
# Safe fallback for any potentially viable model
494493
self.config.max_input_tokens = 4096
495494

495+
if self.config.max_output_tokens is None:
496+
# Safe default for any potentially viable model
497+
self.config.max_output_tokens = 4096
498+
if self.model_info is not None:
499+
# max_output_tokens has precedence over max_tokens, if either exists.
500+
# litellm has models with both, one or none of these 2 parameters!
501+
if 'max_output_tokens' in self.model_info and isinstance(
502+
self.model_info['max_output_tokens'], int
503+
):
504+
self.config.max_output_tokens = self.model_info['max_output_tokens']
505+
elif 'max_tokens' in self.model_info and isinstance(
506+
self.model_info['max_tokens'], int
507+
):
508+
self.config.max_output_tokens = self.model_info['max_tokens']
509+
if any(
510+
model in self.config.model
511+
for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
512+
):
513+
self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set
514+
496515
# Initialize function calling capability
497516
# Check if model name is in our supported list
498517
model_name_supported = (

tests/unit/test_llm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def test_llm_init_with_model_info(mock_get_model_info, default_config):
132132
llm = LLM(default_config)
133133
llm.init_model_info()
134134
assert llm.config.max_input_tokens == 8000
135-
assert llm.config.max_output_tokens is None
135+
assert llm.config.max_output_tokens == 2000
136136

137137

138138
@patch('openhands.llm.llm.litellm.get_model_info')
@@ -141,7 +141,7 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config):
141141
llm = LLM(default_config)
142142
llm.init_model_info()
143143
assert llm.config.max_input_tokens == 4096
144-
assert llm.config.max_output_tokens is None
144+
assert llm.config.max_output_tokens == 4096
145145

146146

147147
def test_llm_init_with_custom_config():
@@ -260,7 +260,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
260260
llm = LLM(default_config)
261261
llm.init_model_info()
262262
assert llm.config.max_input_tokens == 7000
263-
assert llm.config.max_output_tokens is None
263+
assert llm.config.max_output_tokens == 1500
264264
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
265265

266266

0 commit comments

Comments
 (0)