Skip to content

Commit b4b4e66

Browse files
stainless-app[bot]stainless-bot
authored andcommitted
feat(api): add service tier argument for chat completions (#1486)
1 parent 811f4e7 commit b4b4e66

File tree

7 files changed

+109
-2
lines changed

7 files changed

+109
-2
lines changed

.stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 64
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml

src/openai/_base_client.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def _build_request(
457457
raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
458458

459459
headers = self._build_headers(options)
460-
params = _merge_mappings(self._custom_query, options.params)
460+
params = _merge_mappings(self.default_query, options.params)
461461
content_type = headers.get("Content-Type")
462462

463463
# If the given Content-Type header is multipart/form-data then it
@@ -593,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]:
593593
**self._custom_headers,
594594
}
595595

596+
@property
597+
def default_query(self) -> dict[str, object]:
598+
return {
599+
**self._custom_query,
600+
}
601+
596602
def _validate_headers(
597603
self,
598604
headers: Headers, # noqa: ARG002

src/openai/resources/chat/completions.py

+70
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def create(
5959
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
6060
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
6161
seed: Optional[int] | NotGiven = NOT_GIVEN,
62+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
6263
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
6364
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
6465
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -163,6 +164,16 @@ def create(
163164
should refer to the `system_fingerprint` response parameter to monitor changes
164165
in the backend.
165166
167+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
168+
relevant for customers subscribed to the scale tier service:
169+
170+
- If set to 'auto', the system will utilize scale tier credits until they are
171+
exhausted.
172+
- If set to 'default', the request will be processed in the shared cluster.
173+
174+
When this parameter is set, the response body will include the `service_tier`
175+
utilized.
176+
166177
stop: Up to 4 sequences where the API will stop generating further tokens.
167178
168179
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -236,6 +247,7 @@ def create(
236247
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
237248
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
238249
seed: Optional[int] | NotGiven = NOT_GIVEN,
250+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
239251
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
240252
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
241253
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -346,6 +358,16 @@ def create(
346358
should refer to the `system_fingerprint` response parameter to monitor changes
347359
in the backend.
348360
361+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
362+
relevant for customers subscribed to the scale tier service:
363+
364+
- If set to 'auto', the system will utilize scale tier credits until they are
365+
exhausted.
366+
- If set to 'default', the request will be processed in the shared cluster.
367+
368+
When this parameter is set, the response body will include the `service_tier`
369+
utilized.
370+
349371
stop: Up to 4 sequences where the API will stop generating further tokens.
350372
351373
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -412,6 +434,7 @@ def create(
412434
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
413435
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
414436
seed: Optional[int] | NotGiven = NOT_GIVEN,
437+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
415438
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
416439
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
417440
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -522,6 +545,16 @@ def create(
522545
should refer to the `system_fingerprint` response parameter to monitor changes
523546
in the backend.
524547
548+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
549+
relevant for customers subscribed to the scale tier service:
550+
551+
- If set to 'auto', the system will utilize scale tier credits until they are
552+
exhausted.
553+
- If set to 'default', the request will be processed in the shared cluster.
554+
555+
When this parameter is set, the response body will include the `service_tier`
556+
utilized.
557+
525558
stop: Up to 4 sequences where the API will stop generating further tokens.
526559
527560
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -587,6 +620,7 @@ def create(
587620
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
588621
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
589622
seed: Optional[int] | NotGiven = NOT_GIVEN,
623+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
590624
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
591625
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
592626
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -620,6 +654,7 @@ def create(
620654
"presence_penalty": presence_penalty,
621655
"response_format": response_format,
622656
"seed": seed,
657+
"service_tier": service_tier,
623658
"stop": stop,
624659
"stream": stream,
625660
"stream_options": stream_options,
@@ -667,6 +702,7 @@ async def create(
667702
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
668703
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
669704
seed: Optional[int] | NotGiven = NOT_GIVEN,
705+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
670706
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
671707
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
672708
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -771,6 +807,16 @@ async def create(
771807
should refer to the `system_fingerprint` response parameter to monitor changes
772808
in the backend.
773809
810+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
811+
relevant for customers subscribed to the scale tier service:
812+
813+
- If set to 'auto', the system will utilize scale tier credits until they are
814+
exhausted.
815+
- If set to 'default', the request will be processed in the shared cluster.
816+
817+
When this parameter is set, the response body will include the `service_tier`
818+
utilized.
819+
774820
stop: Up to 4 sequences where the API will stop generating further tokens.
775821
776822
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -844,6 +890,7 @@ async def create(
844890
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
845891
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
846892
seed: Optional[int] | NotGiven = NOT_GIVEN,
893+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
847894
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
848895
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
849896
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -954,6 +1001,16 @@ async def create(
9541001
should refer to the `system_fingerprint` response parameter to monitor changes
9551002
in the backend.
9561003
1004+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
1005+
relevant for customers subscribed to the scale tier service:
1006+
1007+
- If set to 'auto', the system will utilize scale tier credits until they are
1008+
exhausted.
1009+
- If set to 'default', the request will be processed in the shared cluster.
1010+
1011+
When this parameter is set, the response body will include the `service_tier`
1012+
utilized.
1013+
9571014
stop: Up to 4 sequences where the API will stop generating further tokens.
9581015
9591016
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1020,6 +1077,7 @@ async def create(
10201077
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
10211078
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
10221079
seed: Optional[int] | NotGiven = NOT_GIVEN,
1080+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
10231081
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
10241082
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
10251083
temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1130,6 +1188,16 @@ async def create(
11301188
should refer to the `system_fingerprint` response parameter to monitor changes
11311189
in the backend.
11321190
1191+
service_tier: Specifies the latency tier to use for processing the request. This parameter is
1192+
relevant for customers subscribed to the scale tier service:
1193+
1194+
- If set to 'auto', the system will utilize scale tier credits until they are
1195+
exhausted.
1196+
- If set to 'default', the request will be processed in the shared cluster.
1197+
1198+
When this parameter is set, the response body will include the `service_tier`
1199+
utilized.
1200+
11331201
stop: Up to 4 sequences where the API will stop generating further tokens.
11341202
11351203
stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1195,6 +1263,7 @@ async def create(
11951263
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
11961264
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
11971265
seed: Optional[int] | NotGiven = NOT_GIVEN,
1266+
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
11981267
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
11991268
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
12001269
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1228,6 +1297,7 @@ async def create(
12281297
"presence_penalty": presence_penalty,
12291298
"response_format": response_format,
12301299
"seed": seed,
1300+
"service_tier": service_tier,
12311301
"stop": stop,
12321302
"stream": stream,
12331303
"stream_options": stream_options,

src/openai/types/chat/chat_completion.py

+7
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ class ChatCompletion(BaseModel):
5656
object: Literal["chat.completion"]
5757
"""The object type, which is always `chat.completion`."""
5858

59+
service_tier: Optional[Literal["scale", "default"]] = None
60+
"""The service tier used for processing the request.
61+
62+
This field is only included if the `service_tier` parameter is specified in the
63+
request.
64+
"""
65+
5966
system_fingerprint: Optional[str] = None
6067
"""This fingerprint represents the backend configuration that the model runs with.
6168

src/openai/types/chat/chat_completion_chunk.py

+7
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,13 @@ class ChatCompletionChunk(BaseModel):
122122
object: Literal["chat.completion.chunk"]
123123
"""The object type, which is always `chat.completion.chunk`."""
124124

125+
service_tier: Optional[Literal["scale", "default"]] = None
126+
"""The service tier used for processing the request.
127+
128+
This field is only included if the `service_tier` parameter is specified in the
129+
request.
130+
"""
131+
125132
system_fingerprint: Optional[str] = None
126133
"""
127134
This fingerprint represents the backend configuration that the model runs with.

src/openai/types/chat/completion_create_params.py

+13
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,19 @@ class CompletionCreateParamsBase(TypedDict, total=False):
146146
in the backend.
147147
"""
148148

149+
service_tier: Optional[Literal["auto", "default"]]
150+
"""Specifies the latency tier to use for processing the request.
151+
152+
This parameter is relevant for customers subscribed to the scale tier service:
153+
154+
- If set to 'auto', the system will utilize scale tier credits until they are
155+
exhausted.
156+
- If set to 'default', the request will be processed in the shared cluster.
157+
158+
When this parameter is set, the response body will include the `service_tier`
159+
utilized.
160+
"""
161+
149162
stop: Union[Optional[str], List[str]]
150163
"""Up to 4 sequences where the API will stop generating further tokens."""
151164

tests/api_resources/chat/test_completions.py

+4
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
6060
presence_penalty=-2,
6161
response_format={"type": "json_object"},
6262
seed=-9223372036854776000,
63+
service_tier="auto",
6364
stop="string",
6465
stream=False,
6566
stream_options={"include_usage": True},
@@ -176,6 +177,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
176177
presence_penalty=-2,
177178
response_format={"type": "json_object"},
178179
seed=-9223372036854776000,
180+
service_tier="auto",
179181
stop="string",
180182
stream_options={"include_usage": True},
181183
temperature=1,
@@ -294,6 +296,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
294296
presence_penalty=-2,
295297
response_format={"type": "json_object"},
296298
seed=-9223372036854776000,
299+
service_tier="auto",
297300
stop="string",
298301
stream=False,
299302
stream_options={"include_usage": True},
@@ -410,6 +413,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
410413
presence_penalty=-2,
411414
response_format={"type": "json_object"},
412415
seed=-9223372036854776000,
416+
service_tier="auto",
413417
stop="string",
414418
stream_options={"include_usage": True},
415419
temperature=1,

0 commit comments

Comments
 (0)