Skip to content

Commit 35397cd

Browse files
authored
Fix divide by zero exception for responses of length 1 (#902)
first commit
1 parent b1befbe commit 35397cd

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,8 @@ async def send_stream_request(
228228
request_latency = (prompt_len, output_len, (request_end_time - request_start_time))
229229

230230
# Exclude first token for tpot calculation
231-
tpot_metric.observe((request_end_time - ttft - request_start_time) / (output_len - 1))
231+
if output_len > 1:
232+
tpot_metric.observe((request_end_time - ttft - request_start_time) / (output_len - 1))
232233
request_latency_per_output_token_metric.observe((request_end_time - request_start_time) / output_len)
233234
if ttft is not None:
234235
ttft_metric.observe(ttft)

0 commit comments

Comments
 (0)