@@ -163,7 +163,6 @@ def __init__(
163
163
'temperature' : self .config .temperature ,
164
164
'max_completion_tokens' : self .config .max_output_tokens ,
165
165
}
166
-
167
166
if self .config .top_k is not None :
168
167
# openai doesn't expose top_k
169
168
# litellm will handle it a bit differently than the openai-compatible params
@@ -493,6 +492,26 @@ def init_model_info(self) -> None:
493
492
# Safe fallback for any potentially viable model
494
493
self .config .max_input_tokens = 4096
495
494
495
+ if self .config .max_output_tokens is None :
496
+ # Safe default for any potentially viable model
497
+ self .config .max_output_tokens = 4096
498
+ if self .model_info is not None :
499
+ # max_output_tokens has precedence over max_tokens, if either exists.
500
+ # litellm has models with both, one or none of these 2 parameters!
501
+ if 'max_output_tokens' in self .model_info and isinstance (
502
+ self .model_info ['max_output_tokens' ], int
503
+ ):
504
+ self .config .max_output_tokens = self .model_info ['max_output_tokens' ]
505
+ elif 'max_tokens' in self .model_info and isinstance (
506
+ self .model_info ['max_tokens' ], int
507
+ ):
508
+ self .config .max_output_tokens = self .model_info ['max_tokens' ]
509
+ if any (
510
+ model in self .config .model
511
+ for model in ['claude-3-7-sonnet' , 'claude-3.7-sonnet' ]
512
+ ):
513
+ self .config .max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set
514
+
496
515
# Initialize function calling capability
497
516
# Check if model name is in our supported list
498
517
model_name_supported = (
0 commit comments