Skip to content

Commit 6259cda

Browse files
authored
Accept extra request inputs in GenAi-Perf (#552)
1 parent 42e8b2a commit 6259cda

File tree

7 files changed

+218
-19
lines changed

7 files changed

+218
-19
lines changed

src/c++/perf_analyzer/genai-perf/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ options:
131131
* `--num-prompts`: The number of unique prompts to generate.
132132
* `--dataset`: HuggingFace dataset to use for benchmarking.
133133

134+
You can optionally set additional model inputs with the following option:
135+
* `--extra-inputs {input_name}:{value}`: An additional input for use with the model with a singular value,
136+
such as `stream:true` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs.
137+
138+
134139
# Metrics
135140

136141
GenAI-Perf collects a diverse set of metrics that captures the performance of
@@ -241,6 +246,13 @@ both infer per second and latency.
241246

242247
Enables the use of the streaming API.
243248

249+
##### `--extra-inputs`
250+
251+
Provides an additional input for use with the model with a singular value,
252+
such as `stream:true` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs.
253+
254+
255+
244256
##### `--endpoint {v1/completions,v1/chat/completions}`
245257

246258
Describes what endpoint to send requests to on the server. This is required when

src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py

+45-11
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def create_llm_inputs(
8686
add_model_name: bool = False,
8787
add_stream: bool = False,
8888
tokenizer: AutoTokenizer = DEFAULT_TOKENIZER,
89+
extra_inputs: Dict = {},
8990
) -> Dict:
9091
"""
9192
Given an input type, input format, and output type. Output a string of LLM Inputs
@@ -109,9 +110,11 @@ def create_llm_inputs(
109110
length:
110111
Number of entries to gather
111112
add_model_name:
112-
If true adds a model name field to each payload
113+
If true, adds a model name field to each payload
113114
add_stream:
114-
If true adds a steam field to each payload
115+
If true, adds a steam field to each payload
116+
extra_inputs:
117+
If provided, append these inputs to every request
115118
116119
Required Synthetic Prompt Generation Parameters
117120
-----------------------------------------------
@@ -164,7 +167,12 @@ def create_llm_inputs(
164167
)
165168

166169
json_in_pa_format = LlmInputs._convert_generic_json_to_output_format(
167-
output_format, generic_dataset_json, add_model_name, add_stream, model_name
170+
output_format,
171+
generic_dataset_json,
172+
add_model_name,
173+
add_stream,
174+
model_name,
175+
extra_inputs,
168176
)
169177
LlmInputs._write_json_to_file(json_in_pa_format)
170178

@@ -309,24 +317,29 @@ def _convert_generic_json_to_output_format(
309317
add_model_name: bool,
310318
add_stream: bool,
311319
model_name: str = "",
320+
extra_inputs: Dict = {},
312321
) -> Dict:
313322
if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
314323
output_json = (
315324
LlmInputs._convert_generic_json_to_openai_chat_completions_format(
316-
generic_dataset, add_model_name, add_stream, model_name
325+
generic_dataset,
326+
add_model_name,
327+
add_stream,
328+
model_name,
329+
extra_inputs,
317330
)
318331
)
319332
elif output_format == OutputFormat.OPENAI_COMPLETIONS:
320333
output_json = LlmInputs._convert_generic_json_to_openai_completions_format(
321-
generic_dataset, add_model_name, add_stream, model_name
334+
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
322335
)
323336
elif output_format == OutputFormat.VLLM:
324337
output_json = LlmInputs._convert_generic_json_to_vllm_format(
325-
generic_dataset, add_model_name, add_stream, model_name
338+
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
326339
)
327340
elif output_format == OutputFormat.TRTLLM:
328341
output_json = LlmInputs._convert_generic_json_to_trtllm_format(
329-
generic_dataset, add_model_name, add_stream, model_name
342+
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
330343
)
331344
else:
332345
raise GenAIPerfException(
@@ -342,6 +355,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
342355
add_model_name: bool,
343356
add_stream: bool,
344357
model_name: str = "",
358+
extra_inputs: Dict = {},
345359
) -> Dict:
346360
# TODO (TMA-1757): Implement a way to select a role for `text_input`
347361
(
@@ -356,6 +370,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
356370
add_model_name,
357371
add_stream,
358372
model_name,
373+
extra_inputs,
359374
)
360375

361376
return pa_json
@@ -367,6 +382,7 @@ def _convert_generic_json_to_openai_completions_format(
367382
add_model_name: bool,
368383
add_stream: bool,
369384
model_name: str = "",
385+
extra_inputs: Dict = {},
370386
) -> Dict:
371387
(
372388
system_role_headers,
@@ -381,6 +397,7 @@ def _convert_generic_json_to_openai_completions_format(
381397
add_model_name,
382398
add_stream,
383399
model_name,
400+
extra_inputs,
384401
)
385402

386403
return pa_json
@@ -392,6 +409,7 @@ def _convert_generic_json_to_vllm_format(
392409
add_model_name: bool,
393410
add_stream: bool,
394411
model_name: str = "",
412+
extra_inputs: Dict = {},
395413
) -> Dict:
396414
(
397415
system_role_headers,
@@ -407,6 +425,7 @@ def _convert_generic_json_to_vllm_format(
407425
add_model_name,
408426
add_stream,
409427
model_name,
428+
extra_inputs,
410429
)
411430

412431
return pa_json
@@ -418,6 +437,7 @@ def _convert_generic_json_to_trtllm_format(
418437
add_model_name: bool,
419438
add_stream: bool,
420439
model_name: str = "",
440+
extra_inputs: Dict = {},
421441
) -> Dict:
422442
(
423443
system_role_headers,
@@ -433,6 +453,7 @@ def _convert_generic_json_to_trtllm_format(
433453
add_model_name,
434454
add_stream,
435455
model_name,
456+
extra_inputs,
436457
)
437458

438459
return pa_json
@@ -480,6 +501,7 @@ def _populate_openai_chat_completions_output_json(
480501
add_model_name: bool,
481502
add_stream: bool,
482503
model_name: str = "",
504+
extra_inputs: Dict = {},
483505
) -> Dict:
484506
pa_json = LlmInputs._create_empty_openai_pa_json()
485507

@@ -497,7 +519,7 @@ def _populate_openai_chat_completions_output_json(
497519
)
498520

499521
pa_json = LlmInputs._add_optional_tags_to_openai_json(
500-
pa_json, index, add_model_name, add_stream, model_name
522+
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
501523
)
502524

503525
return pa_json
@@ -512,6 +534,7 @@ def _populate_openai_completions_output_json(
512534
add_model_name: bool,
513535
add_stream: bool,
514536
model_name: str = "",
537+
extra_inputs: Dict = {},
515538
) -> Dict:
516539
pa_json = LlmInputs._create_empty_openai_pa_json()
517540

@@ -531,7 +554,7 @@ def _populate_openai_completions_output_json(
531554
pa_json = LlmInputs._add_new_prompt_to_json(pa_json, index, new_prompt)
532555

533556
pa_json = LlmInputs._add_optional_tags_to_openai_json(
534-
pa_json, index, add_model_name, add_stream, model_name
557+
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
535558
)
536559

537560
return pa_json
@@ -546,6 +569,7 @@ def _populate_vllm_output_json(
546569
add_model_name: bool,
547570
add_stream: bool,
548571
model_name: str = "",
572+
extra_inputs: Dict = {},
549573
) -> Dict:
550574
pa_json = LlmInputs._create_empty_vllm_pa_json()
551575

@@ -566,7 +590,7 @@ def _populate_vllm_output_json(
566590
)
567591

568592
pa_json = LlmInputs._add_optional_tags_to_vllm_json(
569-
pa_json, index, add_model_name, add_stream, model_name
593+
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
570594
)
571595

572596
return pa_json
@@ -581,6 +605,7 @@ def _populate_trtllm_output_json(
581605
add_model_name: bool,
582606
add_stream: bool,
583607
model_name: str = "",
608+
extra_inputs: Dict = {},
584609
) -> Dict:
585610
pa_json = LlmInputs._create_empty_trtllm_pa_json()
586611

@@ -602,7 +627,7 @@ def _populate_trtllm_output_json(
602627

603628
pa_json = LlmInputs._add_required_tags_to_trtllm_json(pa_json, index)
604629
pa_json = LlmInputs._add_optional_tags_to_trtllm_json(
605-
pa_json, index, add_model_name, add_stream, model_name
630+
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
606631
)
607632

608633
return pa_json
@@ -737,11 +762,14 @@ def _add_optional_tags_to_openai_json(
737762
add_model_name: bool,
738763
add_stream: bool,
739764
model_name: str = "",
765+
extra_inputs: Dict = {},
740766
) -> Dict:
741767
if add_model_name:
742768
pa_json["data"][index]["payload"][0]["model"] = model_name
743769
if add_stream:
744770
pa_json["data"][index]["payload"][0]["stream"] = True
771+
for key, value in extra_inputs.items():
772+
pa_json["data"][index]["payload"][0][key] = value
745773

746774
return pa_json
747775

@@ -753,11 +781,14 @@ def _add_optional_tags_to_vllm_json(
753781
add_model_name: bool,
754782
add_stream: bool,
755783
model_name: str = "",
784+
extra_inputs: Dict = {},
756785
) -> Dict:
757786
if add_model_name:
758787
pa_json["data"][index]["model"] = model_name
759788
if add_stream:
760789
pa_json["data"][index]["stream"] = [True]
790+
for key, value in extra_inputs.items():
791+
pa_json["data"][index][key] = [value]
761792

762793
return pa_json
763794

@@ -769,11 +800,14 @@ def _add_optional_tags_to_trtllm_json(
769800
add_model_name: bool,
770801
add_stream: bool,
771802
model_name: str = "",
803+
extra_inputs: Dict = {},
772804
) -> Dict:
773805
if add_model_name:
774806
pa_json["data"][index]["model"] = model_name
775807
if add_stream:
776808
pa_json["data"][index]["stream"] = [True]
809+
for key, value in extra_inputs.items():
810+
pa_json["data"][index][key] = [value]
777811

778812
return pa_json
779813

src/c++/perf_analyzer/genai-perf/genai_perf/main.py

+6
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None:
4545
input_file_name = ""
4646
# TODO (TMA-1759): review if add_model_name is always true
4747
add_model_name = True
48+
try:
49+
extra_input_dict = parser.get_extra_inputs_as_dict(args)
50+
except ValueError as e:
51+
raise GenAIPerfException(e)
52+
4853
LlmInputs.create_llm_inputs(
4954
input_type=args.prompt_source,
5055
output_format=args.output_format,
@@ -61,6 +66,7 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None:
6166
add_model_name=add_model_name,
6267
add_stream=args.streaming,
6368
tokenizer=tokenizer,
69+
extra_inputs=extra_input_dict,
6470
)
6571

6672

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

+51-5
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ def _check_conditional_args(
5555
elif args.endpoint == "v1/completions":
5656
args.output_format = OutputFormat.OPENAI_COMPLETIONS
5757
elif args.endpoint is not None:
58-
logger.warning(
59-
"The --endpoint option is ignored when not using the 'openai' service-kind."
58+
parser.error(
59+
"The --endpoint option should only be used when using the 'openai' service-kind."
6060
)
6161
if args.service_kind == "triton":
6262
args = _convert_str_to_enum_entry(args, "backend", OutputFormat)
@@ -113,6 +113,13 @@ def handler(args, extra_args):
113113
def _add_input_args(parser):
114114
input_group = parser.add_argument_group("Input")
115115

116+
input_group.add_argument(
117+
"--extra-inputs",
118+
action="append",
119+
help="Provide additional inputs to include with every request. "
120+
"You can repeat this flag for multiple inputs. Inputs should be in an input_name:value format.",
121+
)
122+
116123
input_group.add_argument(
117124
"--input-dataset",
118125
type=str.lower,
@@ -242,9 +249,8 @@ def _add_endpoint_args(parser):
242249
type=str,
243250
choices=["v1/chat/completions", "v1/completions"],
244251
required=False,
245-
help="The endpoint to send requests to on the "
246-
'server. This is required when using the "openai" service-kind. '
247-
"This is ignored in other cases.",
252+
help=f"The endpoint to send requests to on the "
253+
'server. This is only used with the "openai" service-kind. ',
248254
)
249255

250256
endpoint_group.add_argument(
@@ -318,6 +324,46 @@ def _add_other_args(parser):
318324
)
319325

320326

327+
def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict:
328+
request_inputs = {}
329+
if hasattr(args, "extra_inputs"):
330+
for input_str in args.extra_inputs:
331+
semicolon_count = input_str.count(":")
332+
if semicolon_count != 1:
333+
raise ValueError(
334+
f"Invalid input format for --extra-inputs: {input_str}\n"
335+
"Expected input format: 'input_name:value'"
336+
)
337+
input_name, value = input_str.split(":", 1)
338+
339+
if not input_name or not value:
340+
raise ValueError(
341+
f"Input name or value is empty in --extra-inputs: {input_str}\n"
342+
"Expected input format: 'input_name:value'"
343+
)
344+
345+
is_bool = value.lower() in ["true", "false"]
346+
is_int = value.isdigit()
347+
is_float = value.count(".") == 1 and (
348+
value[0] == "." or value.replace(".", "").isdigit()
349+
)
350+
351+
if is_bool:
352+
value = value.lower() == "true"
353+
elif is_int:
354+
value = int(value)
355+
elif is_float:
356+
value = float(value)
357+
358+
if input_name in request_inputs:
359+
raise ValueError(
360+
f"Input name already exists in request_inputs dictionary: {input_name}"
361+
)
362+
request_inputs[input_name] = value
363+
364+
return request_inputs
365+
366+
321367
### Entrypoint ###
322368

323369

src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def build_cmd(args, extra_args):
5757
"input_format",
5858
"model",
5959
"backend",
60+
"extra_inputs",
6061
"output_format",
6162
# The 'streaming' passed in to this script is to determine if the
6263
# LLM response should be streaming. That is different than the

0 commit comments

Comments
 (0)