Skip to content

Commit 35d41d9

Browse files
authored
Reduce CLIENT_TIMEOUT_SEC in benchmarking script (#932)
first commit
1 parent c985e95 commit 35d41d9

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from google.protobuf.timestamp_pb2 import Timestamp
2828

2929
MIN_SEQ_LEN = 4
30-
CLIENT_TIMEOUT_SEC = 3 * 60 * 60
3130
NEW_TEXT_KEY = "\nOutput:\n"
3231
PROMETHEUS_PORT = 9090
3332

@@ -148,6 +147,7 @@ async def send_stream_request(
148147
tokenizer: PreTrainedTokenizerBase,
149148
sax_model: str,
150149
model: str,
150+
timeout: float,
151151
) -> Tuple[Tuple[int, int, float], float, Dict[str, int]]:
152152
"""Sends stream request to server"""
153153
request_start_time = time.time()
@@ -179,7 +179,7 @@ async def send_stream_request(
179179
ttft = 0.0
180180
st = time.perf_counter()
181181
output = ""
182-
timeout = aiohttp.ClientTimeout(total=CLIENT_TIMEOUT_SEC)
182+
timeout = aiohttp.ClientTimeout(total=timeout)
183183
async with aiohttp.ClientSession(timeout=timeout,trust_env=True) as session:
184184
try:
185185
async with session.post(api_url, headers=headers, json=pload, ssl=False) as response:
@@ -249,6 +249,7 @@ async def send_request(
249249
tokenizer: PreTrainedTokenizerBase,
250250
sax_model: str,
251251
model: str,
252+
timeout: float,
252253
) -> Tuple[Tuple[int, int, float], float, Dict[str, int]]:
253254
"""Sends request to server."""
254255
request_start_time = time.time()
@@ -322,7 +323,7 @@ async def send_request(
322323
raise ValueError(f"Unknown backend: {backend}")
323324

324325
# Set client timeout to be 3 hrs.
325-
timeout = aiohttp.ClientTimeout(total=CLIENT_TIMEOUT_SEC)
326+
timeout = aiohttp.ClientTimeout(total=timeout)
326327
async with aiohttp.ClientSession(timeout=timeout,trust_env=True,trace_configs=[trace_config]) as session:
327328
while True:
328329
try:
@@ -426,6 +427,7 @@ async def benchmark(
426427
tokenizer,
427428
args.sax_model,
428429
model,
430+
args.request_timeout,
429431
)
430432
)
431433
else:
@@ -442,6 +444,7 @@ async def benchmark(
442444
tokenizer,
443445
args.sax_model,
444446
model,
447+
args.request_timeout,
445448
)
446449
)
447450
tasks.append(task)
@@ -834,6 +837,12 @@ async def main(args: argparse.Namespace):
834837
action="store_true",
835838
help="Whether to stream the request. Needed for TTFT metric",
836839
)
840+
parser.add_argument(
841+
"--request-timeout",
842+
type=float,
843+
default=(3.0 * 60.0 * 60.0),
844+
help="Individual request timeout",
845+
)
837846
parser.add_argument(
838847
"--tokenizer",
839848
type=str,

0 commit comments

Comments
 (0)