Skip to content

Allow extra request inputs #552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 3, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def create_llm_inputs(
add_model_name: bool = False,
add_stream: bool = False,
tokenizer: AutoTokenizer = DEFAULT_TOKENIZER,
extra_inputs: Dict = {},
) -> Dict:
"""
Given an input type, input format, and output type. Output a string of LLM Inputs
Expand All @@ -109,9 +110,11 @@ def create_llm_inputs(
length:
Number of entries to gather
add_model_name:
If true adds a model name field to each payload
If true, adds a model name field to each payload
add_stream:
If true adds a steam field to each payload
If true, adds a steam field to each payload
extra_inputs:
If provided, append these inputs to every request

Required Synthetic Prompt Generation Parameters
-----------------------------------------------
Expand Down Expand Up @@ -164,7 +167,12 @@ def create_llm_inputs(
)

json_in_pa_format = LlmInputs._convert_generic_json_to_output_format(
output_format, generic_dataset_json, add_model_name, add_stream, model_name
output_format,
generic_dataset_json,
add_model_name,
add_stream,
model_name,
extra_inputs,
)
LlmInputs._write_json_to_file(json_in_pa_format)

Expand Down Expand Up @@ -309,24 +317,29 @@ def _convert_generic_json_to_output_format(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
output_json = (
LlmInputs._convert_generic_json_to_openai_chat_completions_format(
generic_dataset, add_model_name, add_stream, model_name
generic_dataset,
add_model_name,
add_stream,
model_name,
extra_inputs,
)
)
elif output_format == OutputFormat.OPENAI_COMPLETIONS:
output_json = LlmInputs._convert_generic_json_to_openai_completions_format(
generic_dataset, add_model_name, add_stream, model_name
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
)
elif output_format == OutputFormat.VLLM:
output_json = LlmInputs._convert_generic_json_to_vllm_format(
generic_dataset, add_model_name, add_stream, model_name
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
)
elif output_format == OutputFormat.TRTLLM:
output_json = LlmInputs._convert_generic_json_to_trtllm_format(
generic_dataset, add_model_name, add_stream, model_name
generic_dataset, add_model_name, add_stream, model_name, extra_inputs
)
else:
raise GenAIPerfException(
Expand All @@ -342,6 +355,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
# TODO (TMA-1757): Implement a way to select a role for `text_input`
(
Expand All @@ -356,6 +370,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
add_model_name,
add_stream,
model_name,
extra_inputs,
)

return pa_json
Expand All @@ -367,6 +382,7 @@ def _convert_generic_json_to_openai_completions_format(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
(
system_role_headers,
Expand All @@ -381,6 +397,7 @@ def _convert_generic_json_to_openai_completions_format(
add_model_name,
add_stream,
model_name,
extra_inputs,
)

return pa_json
Expand All @@ -392,6 +409,7 @@ def _convert_generic_json_to_vllm_format(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
(
system_role_headers,
Expand All @@ -407,6 +425,7 @@ def _convert_generic_json_to_vllm_format(
add_model_name,
add_stream,
model_name,
extra_inputs,
)

return pa_json
Expand All @@ -418,6 +437,7 @@ def _convert_generic_json_to_trtllm_format(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
(
system_role_headers,
Expand All @@ -433,6 +453,7 @@ def _convert_generic_json_to_trtllm_format(
add_model_name,
add_stream,
model_name,
extra_inputs,
)

return pa_json
Expand Down Expand Up @@ -480,6 +501,7 @@ def _populate_openai_chat_completions_output_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
pa_json = LlmInputs._create_empty_openai_pa_json()

Expand All @@ -497,7 +519,7 @@ def _populate_openai_chat_completions_output_json(
)

pa_json = LlmInputs._add_optional_tags_to_openai_json(
pa_json, index, add_model_name, add_stream, model_name
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
)

return pa_json
Expand All @@ -512,6 +534,7 @@ def _populate_openai_completions_output_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
pa_json = LlmInputs._create_empty_openai_pa_json()

Expand All @@ -531,7 +554,7 @@ def _populate_openai_completions_output_json(
pa_json = LlmInputs._add_new_prompt_to_json(pa_json, index, new_prompt)

pa_json = LlmInputs._add_optional_tags_to_openai_json(
pa_json, index, add_model_name, add_stream, model_name
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
)

return pa_json
Expand All @@ -546,6 +569,7 @@ def _populate_vllm_output_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
pa_json = LlmInputs._create_empty_vllm_pa_json()

Expand All @@ -566,7 +590,7 @@ def _populate_vllm_output_json(
)

pa_json = LlmInputs._add_optional_tags_to_vllm_json(
pa_json, index, add_model_name, add_stream, model_name
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
)

return pa_json
Expand All @@ -581,6 +605,7 @@ def _populate_trtllm_output_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
pa_json = LlmInputs._create_empty_trtllm_pa_json()

Expand All @@ -602,7 +627,7 @@ def _populate_trtllm_output_json(

pa_json = LlmInputs._add_required_tags_to_trtllm_json(pa_json, index)
pa_json = LlmInputs._add_optional_tags_to_trtllm_json(
pa_json, index, add_model_name, add_stream, model_name
pa_json, index, add_model_name, add_stream, model_name, extra_inputs
)

return pa_json
Expand Down Expand Up @@ -737,11 +762,14 @@ def _add_optional_tags_to_openai_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
if add_model_name:
pa_json["data"][index]["payload"][0]["model"] = model_name
if add_stream:
pa_json["data"][index]["payload"][0]["stream"] = True
for key, value in extra_inputs.items():
pa_json["data"][index]["payload"][0][key] = value

return pa_json

Expand All @@ -753,11 +781,14 @@ def _add_optional_tags_to_vllm_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
if add_model_name:
pa_json["data"][index]["model"] = model_name
if add_stream:
pa_json["data"][index]["stream"] = [True]
for key, value in extra_inputs.items():
pa_json["data"][index][key] = value

return pa_json

Expand All @@ -769,11 +800,14 @@ def _add_optional_tags_to_trtllm_json(
add_model_name: bool,
add_stream: bool,
model_name: str = "",
extra_inputs: Dict = {},
) -> Dict:
if add_model_name:
pa_json["data"][index]["model"] = model_name
if add_stream:
pa_json["data"][index]["stream"] = [True]
for key, value in extra_inputs.items():
pa_json["data"][index][key] = value

return pa_json

Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None:
add_model_name=add_model_name,
add_stream=args.streaming,
tokenizer=tokenizer,
extra_inputs=parser.get_extra_inputs_as_dict(args),
)


Expand Down
22 changes: 22 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ def _add_input_args(parser):
help="The seed used to generate random values.",
)

parser.add_argument(
"--synthetic-extra-inputs",
action="append",
help="Provide additional inputs to include with every request when prompt-source is synthetic. "
"You can repeat this flag for multiple inputs. Inputs should be in a key:value format.",
)

input_group.add_argument(
"--synthetic-requested-output-tokens",
type=int,
Expand Down Expand Up @@ -318,6 +325,21 @@ def _add_other_args(parser):
)


def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict:
request_inputs = {}
if hasattr(args, "synthetic_extra_inputs"):
for input_str in args.synthetic_extra_inputs:
try:
key, value = input_str.split(":", 1)
request_inputs[key] = value
except ValueError:
args.error(
f"Invalid input format for --synthetic--extra-inputs: {input_str}"
"Expected input format: 'key:value'"
)
return request_inputs


### Entrypoint ###


Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def build_cmd(args, extra_args):
"input_format",
"model",
"backend",
"extra_inputs",
"output_format",
# The 'streaming' passed in to this script is to determine if the
# LLM response should be streaming. That is different than the
Expand Down
13 changes: 13 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ def test_help_version_arguments_output_and_exit(
["--endpoint", "v1/chat/completions"],
{"endpoint": "v1/chat/completions"},
),
(
["--synthetic-extra-inputs", "random_key:random_value"],
{"synthetic_extra_inputs": ["random_key:random_value"]},
),
(
[
"--synthetic-extra-inputs",
"random_key:5",
"--synthetic-extra-inputs",
"another_random_key:6",
],
{"synthetic_extra_inputs": ["random_key:5", "another_random_key:6"]},
),
(
["--synthetic-requested-output-tokens", "5"],
{"synthetic_requested_output_tokens": 5},
Expand Down
57 changes: 57 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,60 @@ def test_synthetic_to_openai_completions(self, default_tokenizer):

assert pa_json is not None
assert len(pa_json["data"]) == 5

@pytest.mark.parametrize(
"output_format",
[
(OutputFormat.OPENAI_CHAT_COMPLETIONS),
(OutputFormat.OPENAI_COMPLETIONS),
(OutputFormat.TRTLLM),
(OutputFormat.VLLM),
],
)
def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None:
# Simulate --request-input arguments
request_inputs = {"additional_key": "additional_value"}

# Generate input data with the additional request inputs
pa_json = LlmInputs.create_llm_inputs(
input_type=PromptSource.SYNTHETIC,
output_format=output_format,
num_of_output_prompts=5, # Generate a small number of prompts for the test
add_model_name=False,
add_stream=True,
tokenizer=default_tokenizer,
extra_inputs=request_inputs, # Pass the simulated --request-input arguments here
)

assert len(pa_json["data"]) == 5

if (
output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
or output_format == OutputFormat.OPENAI_COMPLETIONS
):
# Verify that each entry in the generated JSON includes the additional key-value pairs
for entry in pa_json.get("data", []):
assert "payload" in entry, "Payload is missing in the request"
payload = entry["payload"]
for item in payload:
assert (
"additional_key" in item
), "The additional_key is not present in the request"
assert (
item["additional_key"] == "additional_value"
), "The value of additional_key is incorrect"
elif output_format == OutputFormat.TRTLLM or output_format == OutputFormat.VLLM:
# Verify that each entry in the generated JSON includes the additional key-value pairs
for entry in pa_json.get("data", []):
assert (
"additional_key" in entry
), "The additional_key is not present in the request"
assert (
entry["additional_key"] == "additional_value"
), "The value of additional_key is incorrect"
else:
assert False, f"Unsupported output format: {output_format}"

print(
"Test passed: --request-input key:value is correctly added to every request in input-data.json"
)
Loading