@@ -59,6 +59,7 @@ def create(
59
59
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
60
60
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
61
61
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
62
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
62
63
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
63
64
stream : Optional [Literal [False ]] | NotGiven = NOT_GIVEN ,
64
65
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
@@ -163,6 +164,16 @@ def create(
163
164
should refer to the `system_fingerprint` response parameter to monitor changes
164
165
in the backend.
165
166
167
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
168
+ relevant for customers subscribed to the scale tier service:
169
+
170
+ - If set to 'auto', the system will utilize scale tier credits until they are
171
+ exhausted.
172
+ - If set to 'default', the request will be processed in the shared cluster.
173
+
174
+ When this parameter is set, the response body will include the `service_tier`
175
+ utilized.
176
+
166
177
stop: Up to 4 sequences where the API will stop generating further tokens.
167
178
168
179
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -236,6 +247,7 @@ def create(
236
247
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
237
248
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
238
249
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
250
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
239
251
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
240
252
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
241
253
temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
@@ -346,6 +358,16 @@ def create(
346
358
should refer to the `system_fingerprint` response parameter to monitor changes
347
359
in the backend.
348
360
361
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
362
+ relevant for customers subscribed to the scale tier service:
363
+
364
+ - If set to 'auto', the system will utilize scale tier credits until they are
365
+ exhausted.
366
+ - If set to 'default', the request will be processed in the shared cluster.
367
+
368
+ When this parameter is set, the response body will include the `service_tier`
369
+ utilized.
370
+
349
371
stop: Up to 4 sequences where the API will stop generating further tokens.
350
372
351
373
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -412,6 +434,7 @@ def create(
412
434
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
413
435
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
414
436
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
437
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
415
438
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
416
439
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
417
440
temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
@@ -522,6 +545,16 @@ def create(
522
545
should refer to the `system_fingerprint` response parameter to monitor changes
523
546
in the backend.
524
547
548
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
549
+ relevant for customers subscribed to the scale tier service:
550
+
551
+ - If set to 'auto', the system will utilize scale tier credits until they are
552
+ exhausted.
553
+ - If set to 'default', the request will be processed in the shared cluster.
554
+
555
+ When this parameter is set, the response body will include the `service_tier`
556
+ utilized.
557
+
525
558
stop: Up to 4 sequences where the API will stop generating further tokens.
526
559
527
560
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -587,6 +620,7 @@ def create(
587
620
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
588
621
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
589
622
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
623
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
590
624
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
591
625
stream : Optional [Literal [False ]] | Literal [True ] | NotGiven = NOT_GIVEN ,
592
626
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
@@ -620,6 +654,7 @@ def create(
620
654
"presence_penalty" : presence_penalty ,
621
655
"response_format" : response_format ,
622
656
"seed" : seed ,
657
+ "service_tier" : service_tier ,
623
658
"stop" : stop ,
624
659
"stream" : stream ,
625
660
"stream_options" : stream_options ,
@@ -667,6 +702,7 @@ async def create(
667
702
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
668
703
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
669
704
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
705
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
670
706
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
671
707
stream : Optional [Literal [False ]] | NotGiven = NOT_GIVEN ,
672
708
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
@@ -771,6 +807,16 @@ async def create(
771
807
should refer to the `system_fingerprint` response parameter to monitor changes
772
808
in the backend.
773
809
810
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
811
+ relevant for customers subscribed to the scale tier service:
812
+
813
+ - If set to 'auto', the system will utilize scale tier credits until they are
814
+ exhausted.
815
+ - If set to 'default', the request will be processed in the shared cluster.
816
+
817
+ When this parameter is set, the response body will include the `service_tier`
818
+ utilized.
819
+
774
820
stop: Up to 4 sequences where the API will stop generating further tokens.
775
821
776
822
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -844,6 +890,7 @@ async def create(
844
890
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
845
891
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
846
892
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
893
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
847
894
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
848
895
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
849
896
temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
@@ -954,6 +1001,16 @@ async def create(
954
1001
should refer to the `system_fingerprint` response parameter to monitor changes
955
1002
in the backend.
956
1003
1004
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
1005
+ relevant for customers subscribed to the scale tier service:
1006
+
1007
+ - If set to 'auto', the system will utilize scale tier credits until they are
1008
+ exhausted.
1009
+ - If set to 'default', the request will be processed in the shared cluster.
1010
+
1011
+ When this parameter is set, the response body will include the `service_tier`
1012
+ utilized.
1013
+
957
1014
stop: Up to 4 sequences where the API will stop generating further tokens.
958
1015
959
1016
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1020,6 +1077,7 @@ async def create(
1020
1077
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
1021
1078
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
1022
1079
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
1080
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
1023
1081
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
1024
1082
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
1025
1083
temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
@@ -1130,6 +1188,16 @@ async def create(
1130
1188
should refer to the `system_fingerprint` response parameter to monitor changes
1131
1189
in the backend.
1132
1190
1191
+ service_tier: Specifies the latency tier to use for processing the request. This parameter is
1192
+ relevant for customers subscribed to the scale tier service:
1193
+
1194
+ - If set to 'auto', the system will utilize scale tier credits until they are
1195
+ exhausted.
1196
+ - If set to 'default', the request will be processed in the shared cluster.
1197
+
1198
+ When this parameter is set, the response body will include the `service_tier`
1199
+ utilized.
1200
+
1133
1201
stop: Up to 4 sequences where the API will stop generating further tokens.
1134
1202
1135
1203
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1195,6 +1263,7 @@ async def create(
1195
1263
presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
1196
1264
response_format : completion_create_params .ResponseFormat | NotGiven = NOT_GIVEN ,
1197
1265
seed : Optional [int ] | NotGiven = NOT_GIVEN ,
1266
+ service_tier : Optional [Literal ["auto" , "default" ]] | NotGiven = NOT_GIVEN ,
1198
1267
stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
1199
1268
stream : Optional [Literal [False ]] | Literal [True ] | NotGiven = NOT_GIVEN ,
1200
1269
stream_options : Optional [ChatCompletionStreamOptionsParam ] | NotGiven = NOT_GIVEN ,
@@ -1228,6 +1297,7 @@ async def create(
1228
1297
"presence_penalty" : presence_penalty ,
1229
1298
"response_format" : response_format ,
1230
1299
"seed" : seed ,
1300
+ "service_tier" : service_tier ,
1231
1301
"stop" : stop ,
1232
1302
"stream" : stream ,
1233
1303
"stream_options" : stream_options ,
0 commit comments