Skip to content

Commit 92285fc

Browse files
Fix throughput to be in output tokens per second (#839)
Throughput was changed to be in rps from output tokens per second. We don't want this to be the default. So, fixing that.
1 parent 3d16439 commit 92285fc

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,14 @@ def main(args: argparse.Namespace):
518518
print(f"Total time: {benchmark_time:.2f} s")
519519
print(f"Requests/min: {60 * args.num_prompts / benchmark_time:.2f}")
520520
benchmark_result['benchmark_time'] = benchmark_time
521-
benchmark_result['throughput'] = (args.num_prompts / benchmark_time)
521+
benchmark_result['throughput_rps'] = (args.num_prompts / benchmark_time)
522522

523523
total_output_tokens = np.sum([output_len for _, output_len, _ in
524524
REQUEST_LATENCY])
525-
output_tokens_per_min = 60 * total_output_tokens / benchmark_time
525+
output_tokens_per_second = total_output_tokens / benchmark_time
526+
benchmark_result['throughput'] = output_tokens_per_second
527+
528+
output_tokens_per_min = 60 * output_tokens_per_second
526529
print(f"Output_tokens/min: {output_tokens_per_min:.2f}")
527530
benchmark_result['total_output_token'] = int(total_output_tokens)
528531
benchmark_result['output_tokens_per_min'] = output_tokens_per_min

0 commit comments

Comments
 (0)