Skip to content

Commit c1633fa

Browse files
authored
Allow benchmark to write json output (#801)
* write json output in benchmark * fix bugs * fix
1 parent 819dfe9 commit c1633fa

File tree

1 file changed

+66
-1
lines changed

1 file changed

+66
-1
lines changed

benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import argparse
99
import asyncio
10+
from datetime import datetime
1011
import json
1112
import random
1213
import time
@@ -266,6 +267,42 @@ async def benchmark(
266267
await asyncio.gather(*tasks)
267268

268269

270+
def save_json_results(args: argparse.Namespace, benchmark_result):
271+
# dimensions values are strings
272+
dimensions_json = {}
273+
# metrics values are numerical
274+
metrics_json = {}
275+
276+
# Setup
277+
current_dt = datetime.now().strftime("%Y%m%d-%H%M%S")
278+
dimensions_json["date"] = current_dt
279+
dimensions_json["backend"] = args.backend
280+
dimensions_json["model_id"] = args.model
281+
dimensions_json["tokenizer_id"] = args.tokenizer
282+
if args.additional_metadata_metrics_to_save is not None:
283+
dimensions_json = {
284+
**dimensions_json,
285+
**json.loads(args.additional_metadata_metrics_to_save),
286+
}
287+
metrics_json["num_prompts"] = args.num_prompts
288+
289+
# Traffic
290+
metrics_json["request_rate"] = args.request_rate
291+
metrics_json = {**metrics_json, **benchmark_result}
292+
293+
final_json = {}
294+
final_json["metrics"] = metrics_json
295+
final_json["dimensions"] = dimensions_json
296+
297+
# Save to file
298+
base_model_id = args.model.split("/")[-1]
299+
file_name = (
300+
f"{args.backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json"
301+
)
302+
with open(file_name, "w", encoding="utf-8") as outfile:
303+
json.dump(final_json, outfile)
304+
305+
269306
def main(args: argparse.Namespace):
270307
print(args)
271308
random.seed(args.seed)
@@ -305,24 +342,32 @@ def main(args: argparse.Namespace):
305342
args.model,
306343
)
307344
)
345+
benchmark_result = {}
308346
benchmark_end_time = time.time()
309347
benchmark_time = benchmark_end_time - benchmark_start_time
310348
print(f"Total time: {benchmark_time:.2f} s")
311349
print(f"Requests/min: {60 * args.num_prompts / benchmark_time:.2f}")
350+
benchmark_result['benchmark_time'] = benchmark_time
312351

313352
total_output_tokens = np.sum([output_len for _, output_len, _ in
314353
REQUEST_LATENCY])
315354
output_tokens_per_min = 60 * total_output_tokens / benchmark_time
316355
print(f"Output_tokens/min: {output_tokens_per_min:.2f}")
356+
benchmark_result['total_output_token'] = int(total_output_tokens)
357+
benchmark_result['output_tokens_per_min'] = output_tokens_per_min
317358

318359
total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in
319360
REQUEST_LATENCY])
320361
input_tokens_per_min = 60 * total_input_tokens / benchmark_time
321362
print(f"Input_tokens/min: {input_tokens_per_min:.2f}")
363+
benchmark_result['total_input_tokens'] = int(total_input_tokens)
364+
benchmark_result['input_tokens_per_min'] = input_tokens_per_min
322365

323366
total_tokens = total_input_tokens + total_output_tokens
324367
tokens_per_min = 60 * total_tokens / benchmark_time
325368
print(f"Tokens/min: {tokens_per_min:.2f}")
369+
benchmark_result['total_tokens'] = int(total_tokens)
370+
benchmark_result['tokens_per_min'] = tokens_per_min
326371

327372
if args.machine_cost:
328373
print(
@@ -336,6 +381,7 @@ def main(args: argparse.Namespace):
336381
"Average seconds/request (includes waiting time on server):"
337382
f" {avg_latency:.2f}"
338383
)
384+
benchmark_result['avg_latency'] = avg_latency
339385

340386
avg_per_token_latency = np.mean([
341387
latency / (prompt_len + output_len)
@@ -345,6 +391,7 @@ def main(args: argparse.Namespace):
345391
"Average milliseconds/token (includes waiting time on server):"
346392
f" {1000 * avg_per_token_latency:.2f}"
347393
)
394+
benchmark_result['avg_per_token_latency'] = avg_per_token_latency
348395

349396
avg_per_output_token_latency = np.mean(
350397
[latency / output_len for _, output_len, latency in REQUEST_LATENCY]
@@ -353,6 +400,7 @@ def main(args: argparse.Namespace):
353400
"Average milliseconds/output_token (includes waiting time on server):"
354401
f" {1000 * avg_per_output_token_latency:.2f}"
355402
)
403+
benchmark_result['avg_per_output_token_latency'] = avg_per_output_token_latency
356404

357405
avg_input_len = np.mean(
358406
[prompt_len for prompt_len, _, _ in REQUEST_LATENCY]
@@ -361,6 +409,7 @@ def main(args: argparse.Namespace):
361409
"Average input length:"
362410
f" {avg_input_len:.2f}"
363411
)
412+
benchmark_result['avg_input_len'] = avg_input_len
364413

365414
avg_output_len = np.mean(
366415
[output_len for _, output_len, _ in REQUEST_LATENCY]
@@ -369,6 +418,10 @@ def main(args: argparse.Namespace):
369418
"Average output length:"
370419
f" {avg_output_len:.2f}"
371420
)
421+
benchmark_result['avg_output_len'] = avg_output_len
422+
423+
if args.save_json_results:
424+
save_json_results(args, benchmark_result)
372425

373426

374427
if __name__ == "__main__":
@@ -479,6 +532,18 @@ def main(args: argparse.Namespace):
479532
" and max_output_length."
480533
),
481534
)
535+
parser.add_argument(
536+
"--save-json-results",
537+
action="store_true",
538+
help="Whether to save benchmark results to a json file.",
539+
)
540+
parser.add_argument(
541+
"--additional-metadata-metrics-to-save",
542+
type=str,
543+
help=(
544+
"Additional metadata about the workload. Should be a dictionary in"
545+
" the form of a string."
546+
),
547+
)
482548
cmd_args = parser.parse_args()
483549
main(cmd_args)
484-

0 commit comments

Comments
 (0)