We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b1befbe commit 35397cdCopy full SHA for 35397cd
benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -228,7 +228,8 @@ async def send_stream_request(
228
request_latency = (prompt_len, output_len, (request_end_time - request_start_time))
229
230
# Exclude first token for tpot calculation
231
- tpot_metric.observe((request_end_time - ttft - request_start_time) / (output_len - 1))
+ if output_len > 1:
232
+ tpot_metric.observe((request_end_time - ttft - request_start_time) / (output_len - 1))
233
request_latency_per_output_token_metric.observe((request_end_time - request_start_time) / output_len)
234
if ttft is not None:
235
ttft_metric.observe(ttft)
0 commit comments