From cbe52eec8f10e42217edb5948ac083f1a2796a58 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Mon, 25 Mar 2024 16:06:32 -0700 Subject: [PATCH 01/14] Add extra_args --- .../genai_perf/llm_inputs/llm_inputs.py | 7 ++++++ .../genai-perf/genai_perf/main.py | 1 + .../genai-perf/genai_perf/parser.py | 23 +++++++++++++++++++ .../genai-perf/genai_perf/wrapper.py | 2 +- 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py index 154b88a4b..812829db8 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py @@ -86,6 +86,7 @@ def create_llm_inputs( add_model_name: bool = False, add_stream: bool = False, tokenizer: AutoTokenizer = DEFAULT_TOKENIZER, + extra_inputs: Dict = {}, ) -> Dict: """ Given an input type, input format, and output type. Output a string of LLM Inputs @@ -112,6 +113,8 @@ def create_llm_inputs( If true adds a model name field to each payload add_stream: If true adds a steam field to each payload + extra_inputs: + If provided, append these inputs to every request Required Synthetic Prompt Generation Parameters ----------------------------------------------- @@ -163,6 +166,10 @@ def create_llm_inputs( "Using a file to supply LLM Input is not supported at this time" ) + if extra_inputs: + for entry in generic_dataset_json["rows"]: + entry["row"].update(extra_inputs) + json_in_pa_format = LlmInputs._convert_generic_json_to_output_format( output_format, generic_dataset_json, add_model_name, add_stream, model_name ) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py index 05fdb2340..8efdbc0ec 100755 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py @@ -61,6 +61,7 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None: add_model_name=add_model_name, add_stream=args.streaming, tokenizer=tokenizer, + extra_inputs=parser.get_extra_inputs_as_dict(args), ) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 4f60cac44..4225b3b95 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -171,6 +171,14 @@ def _add_input_args(parser): help=f"The standard deviation of number of tokens in the generated prompts when prompt-source is synthetic.", ) + parser.add_argument( + "--extra-inputs", + action="append", + help="Provide additional inputs to include with every request. " + "You can repeat this flag for multiple inputs. " + "Inputs should be in a key:value format.", + ) + def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") @@ -318,6 +326,21 @@ def _add_other_args(parser): ) +def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: + request_inputs = {} + if hasattr(args, "extra_inputs"): + for input_str in args.extra_inputs: + try: + key, value = input_str.split(":", 1) + request_inputs[key] = value + except ValueError: + args.error( + f"Invalid input format for --extra-inputs: {input_str}" + "Expected input format: 'key:value'" + ) + return request_inputs + + ### Entrypoint ### diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py index 4eeaaa056..e1c5d2536 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py @@ -57,7 +57,7 @@ def build_cmd(args, extra_args): "input_format", "model", "backend", - "output_format", + "extra_inputs" "output_format", # The 'streaming' passed in to this script is to determine if the # LLM response should be streaming. That is different than the # 'streaming' that PA takes, which means something else (and is From 774070a3d0588534720720f58caf5531148d9233 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 10:28:27 -0700 Subject: [PATCH 02/14] Test extra_args --- .../genai_perf/llm_inputs/llm_inputs.py | 57 +- .../genai-perf/tests/test_llm_inputs.py | 679 ++++++++++-------- 2 files changed, 410 insertions(+), 326 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py index 812829db8..2d648fb52 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py @@ -110,9 +110,9 @@ def create_llm_inputs( length: Number of entries to gather add_model_name: - If true adds a model name field to each payload + If true, adds a model name field to each payload add_stream: - If true adds a steam field to each payload + If true, adds a steam field to each payload extra_inputs: If provided, append these inputs to every request @@ -166,12 +166,13 @@ def create_llm_inputs( "Using a file to supply LLM Input is not supported at this time" ) - if extra_inputs: - for entry in generic_dataset_json["rows"]: - entry["row"].update(extra_inputs) - json_in_pa_format = LlmInputs._convert_generic_json_to_output_format( - output_format, generic_dataset_json, add_model_name, add_stream, model_name + output_format, + generic_dataset_json, + add_model_name, + add_stream, + model_name, + extra_inputs, ) LlmInputs._write_json_to_file(json_in_pa_format) @@ -316,24 +317,29 @@ def _convert_generic_json_to_output_format( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS: output_json = ( LlmInputs._convert_generic_json_to_openai_chat_completions_format( - generic_dataset, add_model_name, add_stream, model_name + generic_dataset, + add_model_name, + add_stream, + model_name, + extra_inputs, ) ) elif output_format == OutputFormat.OPENAI_COMPLETIONS: output_json = LlmInputs._convert_generic_json_to_openai_completions_format( - generic_dataset, add_model_name, add_stream, model_name + generic_dataset, add_model_name, add_stream, model_name, extra_inputs ) elif output_format == OutputFormat.VLLM: output_json = LlmInputs._convert_generic_json_to_vllm_format( - generic_dataset, add_model_name, add_stream, model_name + generic_dataset, add_model_name, add_stream, model_name, extra_inputs ) elif output_format == OutputFormat.TRTLLM: output_json = LlmInputs._convert_generic_json_to_trtllm_format( - generic_dataset, add_model_name, add_stream, model_name + generic_dataset, add_model_name, add_stream, model_name, extra_inputs ) else: raise GenAIPerfException( @@ -349,6 +355,7 @@ def _convert_generic_json_to_openai_chat_completions_format( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: # TODO (TMA-1757): Implement a way to select a role for `text_input` ( @@ -363,6 +370,7 @@ def _convert_generic_json_to_openai_chat_completions_format( add_model_name, add_stream, model_name, + extra_inputs, ) return pa_json @@ -374,6 +382,7 @@ def _convert_generic_json_to_openai_completions_format( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: ( system_role_headers, @@ -388,6 +397,7 @@ def _convert_generic_json_to_openai_completions_format( add_model_name, add_stream, model_name, + extra_inputs, ) return pa_json @@ -399,6 +409,7 @@ def _convert_generic_json_to_vllm_format( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: ( system_role_headers, @@ -414,6 +425,7 @@ def _convert_generic_json_to_vllm_format( add_model_name, add_stream, model_name, + extra_inputs, ) return pa_json @@ -425,6 +437,7 @@ def _convert_generic_json_to_trtllm_format( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: ( system_role_headers, @@ -440,6 +453,7 @@ def _convert_generic_json_to_trtllm_format( add_model_name, add_stream, model_name, + extra_inputs, ) return pa_json @@ -487,6 +501,7 @@ def _populate_openai_chat_completions_output_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: pa_json = LlmInputs._create_empty_openai_pa_json() @@ -504,7 +519,7 @@ def _populate_openai_chat_completions_output_json( ) pa_json = LlmInputs._add_optional_tags_to_openai_json( - pa_json, index, add_model_name, add_stream, model_name + pa_json, index, add_model_name, add_stream, model_name, extra_inputs ) return pa_json @@ -519,6 +534,7 @@ def _populate_openai_completions_output_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: pa_json = LlmInputs._create_empty_openai_pa_json() @@ -538,7 +554,7 @@ def _populate_openai_completions_output_json( pa_json = LlmInputs._add_new_prompt_to_json(pa_json, index, new_prompt) pa_json = LlmInputs._add_optional_tags_to_openai_json( - pa_json, index, add_model_name, add_stream, model_name + pa_json, index, add_model_name, add_stream, model_name, extra_inputs ) return pa_json @@ -553,6 +569,7 @@ def _populate_vllm_output_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: pa_json = LlmInputs._create_empty_vllm_pa_json() @@ -573,7 +590,7 @@ def _populate_vllm_output_json( ) pa_json = LlmInputs._add_optional_tags_to_vllm_json( - pa_json, index, add_model_name, add_stream, model_name + pa_json, index, add_model_name, add_stream, model_name, extra_inputs ) return pa_json @@ -588,6 +605,7 @@ def _populate_trtllm_output_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: pa_json = LlmInputs._create_empty_trtllm_pa_json() @@ -609,7 +627,7 @@ def _populate_trtllm_output_json( pa_json = LlmInputs._add_required_tags_to_trtllm_json(pa_json, index) pa_json = LlmInputs._add_optional_tags_to_trtllm_json( - pa_json, index, add_model_name, add_stream, model_name + pa_json, index, add_model_name, add_stream, model_name, extra_inputs ) return pa_json @@ -744,11 +762,14 @@ def _add_optional_tags_to_openai_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: if add_model_name: pa_json["data"][index]["payload"][0]["model"] = model_name if add_stream: pa_json["data"][index]["payload"][0]["stream"] = True + for key, value in extra_inputs.items(): + pa_json["data"][index]["payload"][0][key] = value return pa_json @@ -760,11 +781,14 @@ def _add_optional_tags_to_vllm_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: if add_model_name: pa_json["data"][index]["model"] = model_name if add_stream: pa_json["data"][index]["stream"] = [True] + for key, value in extra_inputs.items(): + pa_json["data"][index][key] = value return pa_json @@ -776,11 +800,14 @@ def _add_optional_tags_to_trtllm_json( add_model_name: bool, add_stream: bool, model_name: str = "", + extra_inputs: Dict = {}, ) -> Dict: if add_model_name: pa_json["data"][index]["model"] = model_name if add_stream: pa_json["data"][index]["stream"] = [True] + for key, value in extra_inputs.items(): + pa_json["data"][index][key] = value return pa_json diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index 8dcf212c2..85b3f0e9d 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -38,323 +38,380 @@ def default_configured_url(self): def default_tokenizer(self): yield tokenizer.get_tokenizer(tokenizer.DEFAULT_TOKENIZER) - def test_input_type_url_no_dataset_name(self): - """ - Test for exception when input type is URL and no dataset name - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_dataset_name_if_input_type_is_url( - input_type=PromptSource.DATASET, dataset_name="" - ) - - def test_input_type_synthetic_no_tokenizer(self): - """ - Test for exception when input type is SYNTHETIC and no tokenizer - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_tokenzier_if_input_type_is_synthetic( - input_type=PromptSource.SYNTHETIC, tokenizer=None - ) - - def test_illegal_starting_index(self): - """ - Test for exceptions when illegal values are given for starting index - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_valid_starting_index(starting_index="foo") - - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_valid_starting_index(starting_index=-1) - - def test_illegal_length(self): - """ - Test for exceptions when illegal values are given for length - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_valid_length(length="foo") - - with pytest.raises(GenAIPerfException): - _ = LlmInputs._check_for_valid_length(length=0) - - def test_create_configured_url(self): - """ - Test that we are appending and configuring the URL correctly - """ - expected_configured_url = ( - "http://test-url.com" - + f"&offset={LlmInputs.DEFAULT_STARTING_INDEX}" - + f"&length={LlmInputs.DEFAULT_LENGTH}" - ) - configured_url = LlmInputs._create_configured_url( - "http://test-url.com", - LlmInputs.DEFAULT_STARTING_INDEX, - LlmInputs.DEFAULT_LENGTH, - ) - - assert configured_url == expected_configured_url - - def test_download_dataset_illegal_url(self): - """ - Test for exception when URL is bad - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs._download_dataset( - "https://bad-url.zzz", - LlmInputs.DEFAULT_STARTING_INDEX, - LlmInputs.DEFAULT_LENGTH, - ) - - def test_llm_inputs_error_in_server_response(self): - """ - Test for exception when length is out of range - """ - with pytest.raises(GenAIPerfException): - _ = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - dataset_name=OPEN_ORCA, - output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - starting_index=LlmInputs.DEFAULT_STARTING_INDEX, - length=int(LlmInputs.DEFAULT_LENGTH * 100), - ) - - def test_llm_inputs_with_defaults(self, default_configured_url): - """ - Test that default options work - """ - dataset = LlmInputs._download_dataset( - default_configured_url, - LlmInputs.DEFAULT_STARTING_INDEX, - LlmInputs.DEFAULT_LENGTH, - ) - dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - dataset=dataset - ) - - assert dataset_json is not None - assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH - - def test_llm_inputs_with_non_default_length(self): - """ - Test that non-default length works - """ - configured_url = LlmInputs._create_configured_url( - LlmInputs.OPEN_ORCA_URL, - LlmInputs.DEFAULT_STARTING_INDEX, - (int(LlmInputs.DEFAULT_LENGTH / 2)), - ) - dataset = LlmInputs._download_dataset( - configured_url, - LlmInputs.DEFAULT_STARTING_INDEX, - length=(int(LlmInputs.DEFAULT_LENGTH / 2)), - ) - dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - dataset=dataset - ) - - assert dataset_json is not None - assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2 - - def test_convert_default_json_to_pa_format(self, default_configured_url): - """ - Test that conversion to PA JSON format is correct - """ - dataset = LlmInputs._download_dataset( - default_configured_url, - LlmInputs.DEFAULT_STARTING_INDEX, - LlmInputs.DEFAULT_LENGTH, - ) - dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - dataset=dataset - ) - pa_json = LlmInputs._convert_generic_json_to_output_format( - output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - generic_dataset=dataset_json, - add_model_name=False, - add_stream=False, - ) - - assert pa_json is not None - assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - def test_create_openai_llm_inputs_cnn_dailymail(self): - """ - Test CNN_DAILYMAIL can be accessed - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - dataset_name=CNN_DAILY_MAIL, - output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - def test_write_to_file(self): - """ - Test that write to file is working correctly - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - dataset_name=OPEN_ORCA, - output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - model_name="open_orca", - add_model_name=True, - add_stream=True, - ) - try: - f = open(DEFAULT_INPUT_DATA_JSON, "r") - json_str = f.read() - finally: - f.close() - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json == json.loads(json_str) - - def test_create_openai_to_vllm(self): - """ - Test conversion of openai to vllm - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - output_format=OutputFormat.VLLM, - dataset_name=OPEN_ORCA, - add_model_name=False, - add_stream=True, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - def test_create_openai_to_completions(self): - """ - Test conversion of openai to completions - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - output_format=OutputFormat.OPENAI_COMPLETIONS, - dataset_name=OPEN_ORCA, - add_model_name=False, - add_stream=True, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - def test_create_openai_to_trtllm(self): - """ - Test conversion of openai to trtllm - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.DATASET, - output_format=OutputFormat.TRTLLM, - dataset_name=OPEN_ORCA, - add_model_name=False, - add_stream=True, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - def test_random_synthetic(self, default_tokenizer): - """ - Test that we can produce deterministic random synthetic prompts - """ - synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( - default_tokenizer, - LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, - LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV, - LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, - LlmInputs.DEFAULT_RANDOM_SEED, - ) - - # 550 is the num of tokens returned for the default seed - assert synthetic_prompt_tokens == 550 - - synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( - default_tokenizer, - LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, - LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV + 250, - LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, - LlmInputs.DEFAULT_RANDOM_SEED + 1, - ) - assert synthetic_prompt_tokens != 785 - - def test_synthetic_to_vllm(self, default_tokenizer): - """ - Test generating synthetic prompts and converting to vllm - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.SYNTHETIC, - output_format=OutputFormat.VLLM, - num_of_output_prompts=5, - add_model_name=False, - add_stream=True, - tokenizer=default_tokenizer, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == 5 - - def test_synthetic_to_trtllm(self, default_tokenizer): - """ - Test generating synthetic prompts and converting to trtllm - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.SYNTHETIC, - output_format=OutputFormat.TRTLLM, - num_of_output_prompts=5, - add_model_name=False, - add_stream=True, - tokenizer=default_tokenizer, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == 5 - - def test_synthetic_to_openai_chat_completions(self, default_tokenizer): - """ - Test generating synthetic prompts and converting to OpenAI chat completions - """ + # def test_input_type_url_no_dataset_name(self): + # """ + # Test for exception when input type is URL and no dataset name + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_dataset_name_if_input_type_is_url( + # input_type=PromptSource.DATASET, dataset_name="" + # ) + + # def test_input_type_synthetic_no_tokenizer(self): + # """ + # Test for exception when input type is SYNTHETIC and no tokenizer + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_tokenzier_if_input_type_is_synthetic( + # input_type=PromptSource.SYNTHETIC, tokenizer=None + # ) + + # def test_illegal_starting_index(self): + # """ + # Test for exceptions when illegal values are given for starting index + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_valid_starting_index(starting_index="foo") + + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_valid_starting_index(starting_index=-1) + + # def test_illegal_length(self): + # """ + # Test for exceptions when illegal values are given for length + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_valid_length(length="foo") + + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._check_for_valid_length(length=0) + + # def test_create_configured_url(self): + # """ + # Test that we are appending and configuring the URL correctly + # """ + # expected_configured_url = ( + # "http://test-url.com" + # + f"&offset={LlmInputs.DEFAULT_STARTING_INDEX}" + # + f"&length={LlmInputs.DEFAULT_LENGTH}" + # ) + # configured_url = LlmInputs._create_configured_url( + # "http://test-url.com", + # LlmInputs.DEFAULT_STARTING_INDEX, + # LlmInputs.DEFAULT_LENGTH, + # ) + + # assert configured_url == expected_configured_url + + # def test_download_dataset_illegal_url(self): + # """ + # Test for exception when URL is bad + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs._download_dataset( + # "https://bad-url.zzz", + # LlmInputs.DEFAULT_STARTING_INDEX, + # LlmInputs.DEFAULT_LENGTH, + # ) + + # def test_llm_inputs_error_in_server_response(self): + # """ + # Test for exception when length is out of range + # """ + # with pytest.raises(GenAIPerfException): + # _ = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # dataset_name=OPEN_ORCA, + # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + # starting_index=LlmInputs.DEFAULT_STARTING_INDEX, + # length=int(LlmInputs.DEFAULT_LENGTH * 100), + # ) + + # def test_llm_inputs_with_defaults(self, default_configured_url): + # """ + # Test that default options work + # """ + # dataset = LlmInputs._download_dataset( + # default_configured_url, + # LlmInputs.DEFAULT_STARTING_INDEX, + # LlmInputs.DEFAULT_LENGTH, + # ) + # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + # dataset=dataset + # ) + + # assert dataset_json is not None + # assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH + + # def test_llm_inputs_with_non_default_length(self): + # """ + # Test that non-default length works + # """ + # configured_url = LlmInputs._create_configured_url( + # LlmInputs.OPEN_ORCA_URL, + # LlmInputs.DEFAULT_STARTING_INDEX, + # (int(LlmInputs.DEFAULT_LENGTH / 2)), + # ) + # dataset = LlmInputs._download_dataset( + # configured_url, + # LlmInputs.DEFAULT_STARTING_INDEX, + # length=(int(LlmInputs.DEFAULT_LENGTH / 2)), + # ) + # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + # dataset=dataset + # ) + + # assert dataset_json is not None + # assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2 + + # def test_convert_default_json_to_pa_format(self, default_configured_url): + # """ + # Test that conversion to PA JSON format is correct + # """ + # dataset = LlmInputs._download_dataset( + # default_configured_url, + # LlmInputs.DEFAULT_STARTING_INDEX, + # LlmInputs.DEFAULT_LENGTH, + # ) + # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + # dataset=dataset + # ) + # pa_json = LlmInputs._convert_generic_json_to_output_format( + # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + # generic_dataset=dataset_json, + # add_model_name=False, + # add_stream=False, + # ) + + # assert pa_json is not None + # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + # def test_create_openai_llm_inputs_cnn_dailymail(self): + # """ + # Test CNN_DAILYMAIL can be accessed + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # dataset_name=CNN_DAILY_MAIL, + # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + # def test_write_to_file(self): + # """ + # Test that write to file is working correctly + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # dataset_name=OPEN_ORCA, + # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + # model_name="open_orca", + # add_model_name=True, + # add_stream=True, + # ) + # try: + # f = open(DEFAULT_INPUT_DATA_JSON, "r") + # json_str = f.read() + # finally: + # f.close() + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json == json.loads(json_str) + + # def test_create_openai_to_vllm(self): + # """ + # Test conversion of openai to vllm + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # output_format=OutputFormat.VLLM, + # dataset_name=OPEN_ORCA, + # add_model_name=False, + # add_stream=True, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + # def test_create_openai_to_completions(self): + # """ + # Test conversion of openai to completions + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # output_format=OutputFormat.OPENAI_COMPLETIONS, + # dataset_name=OPEN_ORCA, + # add_model_name=False, + # add_stream=True, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + # def test_create_openai_to_trtllm(self): + # """ + # Test conversion of openai to trtllm + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.DATASET, + # output_format=OutputFormat.TRTLLM, + # dataset_name=OPEN_ORCA, + # add_model_name=False, + # add_stream=True, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + # def test_random_synthetic(self, default_tokenizer): + # """ + # Test that we can produce deterministic random synthetic prompts + # """ + # synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( + # default_tokenizer, + # LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, + # LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV, + # LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, + # LlmInputs.DEFAULT_RANDOM_SEED, + # ) + + # # 550 is the num of tokens returned for the default seed + # assert synthetic_prompt_tokens == 550 + + # synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( + # default_tokenizer, + # LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, + # LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV + 250, + # LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, + # LlmInputs.DEFAULT_RANDOM_SEED + 1, + # ) + # assert synthetic_prompt_tokens != 785 + + # def test_synthetic_to_vllm(self, default_tokenizer): + # """ + # Test generating synthetic prompts and converting to vllm + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.SYNTHETIC, + # output_format=OutputFormat.VLLM, + # num_of_output_prompts=5, + # add_model_name=False, + # add_stream=True, + # tokenizer=default_tokenizer, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == 5 + + # def test_synthetic_to_trtllm(self, default_tokenizer): + # """ + # Test generating synthetic prompts and converting to trtllm + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.SYNTHETIC, + # output_format=OutputFormat.TRTLLM, + # num_of_output_prompts=5, + # add_model_name=False, + # add_stream=True, + # tokenizer=default_tokenizer, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == 5 + + # def test_synthetic_to_openai_chat_completions(self, default_tokenizer): + # """ + # Test generating synthetic prompts and converting to OpenAI chat completions + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.SYNTHETIC, + # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + # num_of_output_prompts=5, + # add_model_name=False, + # add_stream=True, + # tokenizer=default_tokenizer, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == 5 + + # def test_synthetic_to_openai_completions(self, default_tokenizer): + # """ + # Test generating synthetic prompts and converting to OpenAI completions + # """ + # pa_json = LlmInputs.create_llm_inputs( + # input_type=PromptSource.SYNTHETIC, + # output_format=OutputFormat.OPENAI_COMPLETIONS, + # num_of_output_prompts=5, + # add_model_name=False, + # add_stream=True, + # tokenizer=default_tokenizer, + # ) + + # os.remove(DEFAULT_INPUT_DATA_JSON) + + # assert pa_json is not None + # assert len(pa_json["data"]) == 5 + + @pytest.mark.parametrize( + "output_format", + [ + (OutputFormat.OPENAI_CHAT_COMPLETIONS), + (OutputFormat.OPENAI_COMPLETIONS), + (OutputFormat.TRTLLM), + (OutputFormat.VLLM), + ], + ) + def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None: + # Simulate --request-input arguments + request_inputs = {"additional_key": "additional_value"} + + # Generate input data with the additional request inputs pa_json = LlmInputs.create_llm_inputs( input_type=PromptSource.SYNTHETIC, - output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - num_of_output_prompts=5, + output_format=output_format, + num_of_output_prompts=5, # Generate a small number of prompts for the test add_model_name=False, add_stream=True, tokenizer=default_tokenizer, + extra_inputs=request_inputs, # Pass the simulated --request-input arguments here ) - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None assert len(pa_json["data"]) == 5 - def test_synthetic_to_openai_completions(self, default_tokenizer): - """ - Test generating synthetic prompts and converting to OpenAI completions - """ - pa_json = LlmInputs.create_llm_inputs( - input_type=PromptSource.SYNTHETIC, - output_format=OutputFormat.OPENAI_COMPLETIONS, - num_of_output_prompts=5, - add_model_name=False, - add_stream=True, - tokenizer=default_tokenizer, - ) - - os.remove(DEFAULT_INPUT_DATA_JSON) - - assert pa_json is not None - assert len(pa_json["data"]) == 5 + if ( + output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS + or output_format == OutputFormat.OPENAI_COMPLETIONS + ): + # Verify that each entry in the generated JSON includes the additional key-value pairs + for entry in pa_json.get("data", []): + assert "payload" in entry, "Payload is missing in the request" + payload = entry["payload"] + for item in payload: + assert ( + "additional_key" in item + ), "The additional_key is not present in the request" + assert ( + item["additional_key"] == "additional_value" + ), "The value of additional_key is incorrect" + elif output_format == OutputFormat.TRTLLM or output_format == OutputFormat.VLLM: + # Verify that each entry in the generated JSON includes the additional key-value pairs + for entry in pa_json.get("data", []): + assert ( + "additional_key" in entry + ), "The additional_key is not present in the request" + assert ( + entry["additional_key"] == "additional_value" + ), "The value of additional_key is incorrect" + else: + assert False, f"Unsupported output format: {output_format}" + + print( + "Test passed: --request-input key:value is correctly added to every request in input-data.json" + ) From 73191f110f7bfca0c1a22a3d473892d23156c7f6 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 10:29:41 -0700 Subject: [PATCH 03/14] Revert commenting current LLM input tests --- .../genai-perf/tests/test_llm_inputs.py | 640 +++++++++--------- 1 file changed, 320 insertions(+), 320 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index 85b3f0e9d..7ca453117 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -38,326 +38,326 @@ def default_configured_url(self): def default_tokenizer(self): yield tokenizer.get_tokenizer(tokenizer.DEFAULT_TOKENIZER) - # def test_input_type_url_no_dataset_name(self): - # """ - # Test for exception when input type is URL and no dataset name - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_dataset_name_if_input_type_is_url( - # input_type=PromptSource.DATASET, dataset_name="" - # ) - - # def test_input_type_synthetic_no_tokenizer(self): - # """ - # Test for exception when input type is SYNTHETIC and no tokenizer - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_tokenzier_if_input_type_is_synthetic( - # input_type=PromptSource.SYNTHETIC, tokenizer=None - # ) - - # def test_illegal_starting_index(self): - # """ - # Test for exceptions when illegal values are given for starting index - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_valid_starting_index(starting_index="foo") - - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_valid_starting_index(starting_index=-1) - - # def test_illegal_length(self): - # """ - # Test for exceptions when illegal values are given for length - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_valid_length(length="foo") - - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._check_for_valid_length(length=0) - - # def test_create_configured_url(self): - # """ - # Test that we are appending and configuring the URL correctly - # """ - # expected_configured_url = ( - # "http://test-url.com" - # + f"&offset={LlmInputs.DEFAULT_STARTING_INDEX}" - # + f"&length={LlmInputs.DEFAULT_LENGTH}" - # ) - # configured_url = LlmInputs._create_configured_url( - # "http://test-url.com", - # LlmInputs.DEFAULT_STARTING_INDEX, - # LlmInputs.DEFAULT_LENGTH, - # ) - - # assert configured_url == expected_configured_url - - # def test_download_dataset_illegal_url(self): - # """ - # Test for exception when URL is bad - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs._download_dataset( - # "https://bad-url.zzz", - # LlmInputs.DEFAULT_STARTING_INDEX, - # LlmInputs.DEFAULT_LENGTH, - # ) - - # def test_llm_inputs_error_in_server_response(self): - # """ - # Test for exception when length is out of range - # """ - # with pytest.raises(GenAIPerfException): - # _ = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # dataset_name=OPEN_ORCA, - # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - # starting_index=LlmInputs.DEFAULT_STARTING_INDEX, - # length=int(LlmInputs.DEFAULT_LENGTH * 100), - # ) - - # def test_llm_inputs_with_defaults(self, default_configured_url): - # """ - # Test that default options work - # """ - # dataset = LlmInputs._download_dataset( - # default_configured_url, - # LlmInputs.DEFAULT_STARTING_INDEX, - # LlmInputs.DEFAULT_LENGTH, - # ) - # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - # dataset=dataset - # ) - - # assert dataset_json is not None - # assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH - - # def test_llm_inputs_with_non_default_length(self): - # """ - # Test that non-default length works - # """ - # configured_url = LlmInputs._create_configured_url( - # LlmInputs.OPEN_ORCA_URL, - # LlmInputs.DEFAULT_STARTING_INDEX, - # (int(LlmInputs.DEFAULT_LENGTH / 2)), - # ) - # dataset = LlmInputs._download_dataset( - # configured_url, - # LlmInputs.DEFAULT_STARTING_INDEX, - # length=(int(LlmInputs.DEFAULT_LENGTH / 2)), - # ) - # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - # dataset=dataset - # ) - - # assert dataset_json is not None - # assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2 - - # def test_convert_default_json_to_pa_format(self, default_configured_url): - # """ - # Test that conversion to PA JSON format is correct - # """ - # dataset = LlmInputs._download_dataset( - # default_configured_url, - # LlmInputs.DEFAULT_STARTING_INDEX, - # LlmInputs.DEFAULT_LENGTH, - # ) - # dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( - # dataset=dataset - # ) - # pa_json = LlmInputs._convert_generic_json_to_output_format( - # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - # generic_dataset=dataset_json, - # add_model_name=False, - # add_stream=False, - # ) - - # assert pa_json is not None - # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - # def test_create_openai_llm_inputs_cnn_dailymail(self): - # """ - # Test CNN_DAILYMAIL can be accessed - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # dataset_name=CNN_DAILY_MAIL, - # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - # def test_write_to_file(self): - # """ - # Test that write to file is working correctly - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # dataset_name=OPEN_ORCA, - # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - # model_name="open_orca", - # add_model_name=True, - # add_stream=True, - # ) - # try: - # f = open(DEFAULT_INPUT_DATA_JSON, "r") - # json_str = f.read() - # finally: - # f.close() - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json == json.loads(json_str) - - # def test_create_openai_to_vllm(self): - # """ - # Test conversion of openai to vllm - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # output_format=OutputFormat.VLLM, - # dataset_name=OPEN_ORCA, - # add_model_name=False, - # add_stream=True, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - # def test_create_openai_to_completions(self): - # """ - # Test conversion of openai to completions - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # output_format=OutputFormat.OPENAI_COMPLETIONS, - # dataset_name=OPEN_ORCA, - # add_model_name=False, - # add_stream=True, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - # def test_create_openai_to_trtllm(self): - # """ - # Test conversion of openai to trtllm - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.DATASET, - # output_format=OutputFormat.TRTLLM, - # dataset_name=OPEN_ORCA, - # add_model_name=False, - # add_stream=True, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH - - # def test_random_synthetic(self, default_tokenizer): - # """ - # Test that we can produce deterministic random synthetic prompts - # """ - # synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( - # default_tokenizer, - # LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, - # LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV, - # LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, - # LlmInputs.DEFAULT_RANDOM_SEED, - # ) - - # # 550 is the num of tokens returned for the default seed - # assert synthetic_prompt_tokens == 550 - - # synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( - # default_tokenizer, - # LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, - # LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV + 250, - # LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, - # LlmInputs.DEFAULT_RANDOM_SEED + 1, - # ) - # assert synthetic_prompt_tokens != 785 - - # def test_synthetic_to_vllm(self, default_tokenizer): - # """ - # Test generating synthetic prompts and converting to vllm - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.SYNTHETIC, - # output_format=OutputFormat.VLLM, - # num_of_output_prompts=5, - # add_model_name=False, - # add_stream=True, - # tokenizer=default_tokenizer, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == 5 - - # def test_synthetic_to_trtllm(self, default_tokenizer): - # """ - # Test generating synthetic prompts and converting to trtllm - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.SYNTHETIC, - # output_format=OutputFormat.TRTLLM, - # num_of_output_prompts=5, - # add_model_name=False, - # add_stream=True, - # tokenizer=default_tokenizer, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == 5 - - # def test_synthetic_to_openai_chat_completions(self, default_tokenizer): - # """ - # Test generating synthetic prompts and converting to OpenAI chat completions - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.SYNTHETIC, - # output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, - # num_of_output_prompts=5, - # add_model_name=False, - # add_stream=True, - # tokenizer=default_tokenizer, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == 5 - - # def test_synthetic_to_openai_completions(self, default_tokenizer): - # """ - # Test generating synthetic prompts and converting to OpenAI completions - # """ - # pa_json = LlmInputs.create_llm_inputs( - # input_type=PromptSource.SYNTHETIC, - # output_format=OutputFormat.OPENAI_COMPLETIONS, - # num_of_output_prompts=5, - # add_model_name=False, - # add_stream=True, - # tokenizer=default_tokenizer, - # ) - - # os.remove(DEFAULT_INPUT_DATA_JSON) - - # assert pa_json is not None - # assert len(pa_json["data"]) == 5 + def test_input_type_url_no_dataset_name(self): + """ + Test for exception when input type is URL and no dataset name + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_dataset_name_if_input_type_is_url( + input_type=PromptSource.DATASET, dataset_name="" + ) + + def test_input_type_synthetic_no_tokenizer(self): + """ + Test for exception when input type is SYNTHETIC and no tokenizer + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_tokenzier_if_input_type_is_synthetic( + input_type=PromptSource.SYNTHETIC, tokenizer=None + ) + + def test_illegal_starting_index(self): + """ + Test for exceptions when illegal values are given for starting index + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_valid_starting_index(starting_index="foo") + + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_valid_starting_index(starting_index=-1) + + def test_illegal_length(self): + """ + Test for exceptions when illegal values are given for length + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_valid_length(length="foo") + + with pytest.raises(GenAIPerfException): + _ = LlmInputs._check_for_valid_length(length=0) + + def test_create_configured_url(self): + """ + Test that we are appending and configuring the URL correctly + """ + expected_configured_url = ( + "http://test-url.com" + + f"&offset={LlmInputs.DEFAULT_STARTING_INDEX}" + + f"&length={LlmInputs.DEFAULT_LENGTH}" + ) + configured_url = LlmInputs._create_configured_url( + "http://test-url.com", + LlmInputs.DEFAULT_STARTING_INDEX, + LlmInputs.DEFAULT_LENGTH, + ) + + assert configured_url == expected_configured_url + + def test_download_dataset_illegal_url(self): + """ + Test for exception when URL is bad + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs._download_dataset( + "https://bad-url.zzz", + LlmInputs.DEFAULT_STARTING_INDEX, + LlmInputs.DEFAULT_LENGTH, + ) + + def test_llm_inputs_error_in_server_response(self): + """ + Test for exception when length is out of range + """ + with pytest.raises(GenAIPerfException): + _ = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + dataset_name=OPEN_ORCA, + output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + starting_index=LlmInputs.DEFAULT_STARTING_INDEX, + length=int(LlmInputs.DEFAULT_LENGTH * 100), + ) + + def test_llm_inputs_with_defaults(self, default_configured_url): + """ + Test that default options work + """ + dataset = LlmInputs._download_dataset( + default_configured_url, + LlmInputs.DEFAULT_STARTING_INDEX, + LlmInputs.DEFAULT_LENGTH, + ) + dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + dataset=dataset + ) + + assert dataset_json is not None + assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH + + def test_llm_inputs_with_non_default_length(self): + """ + Test that non-default length works + """ + configured_url = LlmInputs._create_configured_url( + LlmInputs.OPEN_ORCA_URL, + LlmInputs.DEFAULT_STARTING_INDEX, + (int(LlmInputs.DEFAULT_LENGTH / 2)), + ) + dataset = LlmInputs._download_dataset( + configured_url, + LlmInputs.DEFAULT_STARTING_INDEX, + length=(int(LlmInputs.DEFAULT_LENGTH / 2)), + ) + dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + dataset=dataset + ) + + assert dataset_json is not None + assert len(dataset_json["rows"]) == LlmInputs.DEFAULT_LENGTH / 2 + + def test_convert_default_json_to_pa_format(self, default_configured_url): + """ + Test that conversion to PA JSON format is correct + """ + dataset = LlmInputs._download_dataset( + default_configured_url, + LlmInputs.DEFAULT_STARTING_INDEX, + LlmInputs.DEFAULT_LENGTH, + ) + dataset_json = LlmInputs._convert_input_url_dataset_to_generic_json( + dataset=dataset + ) + pa_json = LlmInputs._convert_generic_json_to_output_format( + output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + generic_dataset=dataset_json, + add_model_name=False, + add_stream=False, + ) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + def test_create_openai_llm_inputs_cnn_dailymail(self): + """ + Test CNN_DAILYMAIL can be accessed + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + dataset_name=CNN_DAILY_MAIL, + output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + def test_write_to_file(self): + """ + Test that write to file is working correctly + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + dataset_name=OPEN_ORCA, + output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + model_name="open_orca", + add_model_name=True, + add_stream=True, + ) + try: + f = open(DEFAULT_INPUT_DATA_JSON, "r") + json_str = f.read() + finally: + f.close() + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json == json.loads(json_str) + + def test_create_openai_to_vllm(self): + """ + Test conversion of openai to vllm + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + output_format=OutputFormat.VLLM, + dataset_name=OPEN_ORCA, + add_model_name=False, + add_stream=True, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + def test_create_openai_to_completions(self): + """ + Test conversion of openai to completions + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + output_format=OutputFormat.OPENAI_COMPLETIONS, + dataset_name=OPEN_ORCA, + add_model_name=False, + add_stream=True, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + def test_create_openai_to_trtllm(self): + """ + Test conversion of openai to trtllm + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.DATASET, + output_format=OutputFormat.TRTLLM, + dataset_name=OPEN_ORCA, + add_model_name=False, + add_stream=True, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == LlmInputs.DEFAULT_LENGTH + + def test_random_synthetic(self, default_tokenizer): + """ + Test that we can produce deterministic random synthetic prompts + """ + synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( + default_tokenizer, + LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, + LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV, + LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, + LlmInputs.DEFAULT_RANDOM_SEED, + ) + + # 550 is the num of tokens returned for the default seed + assert synthetic_prompt_tokens == 550 + + synthetic_prompt, synthetic_prompt_tokens = LlmInputs._create_synthetic_prompt( + default_tokenizer, + LlmInputs.DEFAULT_PROMPT_TOKENS_MEAN, + LlmInputs.DEFAULT_PROMPT_TOKENS_STDDEV + 250, + LlmInputs.DEFAULT_REQUESTED_OUTPUT_TOKENS, + LlmInputs.DEFAULT_RANDOM_SEED + 1, + ) + assert synthetic_prompt_tokens != 785 + + def test_synthetic_to_vllm(self, default_tokenizer): + """ + Test generating synthetic prompts and converting to vllm + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.SYNTHETIC, + output_format=OutputFormat.VLLM, + num_of_output_prompts=5, + add_model_name=False, + add_stream=True, + tokenizer=default_tokenizer, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == 5 + + def test_synthetic_to_trtllm(self, default_tokenizer): + """ + Test generating synthetic prompts and converting to trtllm + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.SYNTHETIC, + output_format=OutputFormat.TRTLLM, + num_of_output_prompts=5, + add_model_name=False, + add_stream=True, + tokenizer=default_tokenizer, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == 5 + + def test_synthetic_to_openai_chat_completions(self, default_tokenizer): + """ + Test generating synthetic prompts and converting to OpenAI chat completions + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.SYNTHETIC, + output_format=OutputFormat.OPENAI_CHAT_COMPLETIONS, + num_of_output_prompts=5, + add_model_name=False, + add_stream=True, + tokenizer=default_tokenizer, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == 5 + + def test_synthetic_to_openai_completions(self, default_tokenizer): + """ + Test generating synthetic prompts and converting to OpenAI completions + """ + pa_json = LlmInputs.create_llm_inputs( + input_type=PromptSource.SYNTHETIC, + output_format=OutputFormat.OPENAI_COMPLETIONS, + num_of_output_prompts=5, + add_model_name=False, + add_stream=True, + tokenizer=default_tokenizer, + ) + + os.remove(DEFAULT_INPUT_DATA_JSON) + + assert pa_json is not None + assert len(pa_json["data"]) == 5 @pytest.mark.parametrize( "output_format", From 0776d6befe6749cb73eb06c361e3ebf079b07a73 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 11:48:27 -0700 Subject: [PATCH 04/14] Add comma between args --- src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py index e1c5d2536..1e1533ecd 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py @@ -57,7 +57,8 @@ def build_cmd(args, extra_args): "input_format", "model", "backend", - "extra_inputs" "output_format", + "extra_inputs", + "output_format", # The 'streaming' passed in to this script is to determine if the # LLM response should be streaming. That is different than the # 'streaming' that PA takes, which means something else (and is From 8cea6efb05a7c1f34789a4a799dae85fad8b4cc5 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 13:09:12 -0700 Subject: [PATCH 05/14] Change arg name, add parser test --- .../genai-perf/genai_perf/parser.py | 21 +++++++++---------- .../genai-perf/tests/test_cli.py | 13 ++++++++++++ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 4225b3b95..533d2d581 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -147,6 +147,13 @@ def _add_input_args(parser): help="The seed used to generate random values.", ) + parser.add_argument( + "--synthetic-extra-inputs", + action="append", + help="Provide additional inputs to include with every request when prompt-source is synthetic. " + "You can repeat this flag for multiple inputs. Inputs should be in a key:value format.", + ) + input_group.add_argument( "--synthetic-requested-output-tokens", type=int, @@ -171,14 +178,6 @@ def _add_input_args(parser): help=f"The standard deviation of number of tokens in the generated prompts when prompt-source is synthetic.", ) - parser.add_argument( - "--extra-inputs", - action="append", - help="Provide additional inputs to include with every request. " - "You can repeat this flag for multiple inputs. " - "Inputs should be in a key:value format.", - ) - def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") @@ -328,14 +327,14 @@ def _add_other_args(parser): def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: request_inputs = {} - if hasattr(args, "extra_inputs"): - for input_str in args.extra_inputs: + if hasattr(args, "synthetic_extra_inputs"): + for input_str in args.synthetic_extra_inputs: try: key, value = input_str.split(":", 1) request_inputs[key] = value except ValueError: args.error( - f"Invalid input format for --extra-inputs: {input_str}" + f"Invalid input format for --synthetic--extra-inputs: {input_str}" "Expected input format: 'key:value'" ) return request_inputs diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index d4f4bb831..79339f416 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -75,6 +75,19 @@ def test_help_version_arguments_output_and_exit( ["--endpoint", "v1/chat/completions"], {"endpoint": "v1/chat/completions"}, ), + ( + ["--synthetic-extra-inputs", "random_key:random_value"], + {"synthetic_extra_inputs": ["random_key:random_value"]}, + ), + ( + [ + "--synthetic-extra-inputs", + "random_key:5", + "--synthetic-extra-inputs", + "another_random_key:6", + ], + {"synthetic_extra_inputs": ["random_key:5", "another_random_key:6"]}, + ), ( ["--synthetic-requested-output-tokens", "5"], {"synthetic_requested_output_tokens": 5}, From 7aad03e57167a5da83550f18d69aa4790028c575 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 15:09:30 -0700 Subject: [PATCH 06/14] Add extra error checking and tests --- .../genai-perf/genai_perf/main.py | 7 ++- .../genai-perf/genai_perf/parser.py | 32 ++++++++----- .../genai-perf/tests/test_cli.py | 47 +++++++++++++++---- .../genai-perf/tests/test_llm_inputs.py | 7 +-- 4 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py index 8efdbc0ec..b86ad8845 100755 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py @@ -45,6 +45,11 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None: input_file_name = "" # TODO (TMA-1759): review if add_model_name is always true add_model_name = True + try: + extra_input_dict = parser.get_extra_inputs_as_dict(args) + except Exception as e: + raise GenAIPerfException(e) + LlmInputs.create_llm_inputs( input_type=args.prompt_source, output_format=args.output_format, @@ -61,7 +66,7 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None: add_model_name=add_model_name, add_stream=args.streaming, tokenizer=tokenizer, - extra_inputs=parser.get_extra_inputs_as_dict(args), + extra_inputs=extra_input_dict, ) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 533d2d581..fedca828c 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -113,6 +113,13 @@ def handler(args, extra_args): def _add_input_args(parser): input_group = parser.add_argument_group("Input") + parser.add_argument( + "--extra-inputs", + action="append", + help="Provide additional inputs to include with every request. " + "You can repeat this flag for multiple inputs. Inputs should be in a key:value format.", + ) + input_group.add_argument( "--input-dataset", type=str.lower, @@ -147,13 +154,6 @@ def _add_input_args(parser): help="The seed used to generate random values.", ) - parser.add_argument( - "--synthetic-extra-inputs", - action="append", - help="Provide additional inputs to include with every request when prompt-source is synthetic. " - "You can repeat this flag for multiple inputs. Inputs should be in a key:value format.", - ) - input_group.add_argument( "--synthetic-requested-output-tokens", type=int, @@ -327,16 +327,24 @@ def _add_other_args(parser): def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: request_inputs = {} - if hasattr(args, "synthetic_extra_inputs"): - for input_str in args.synthetic_extra_inputs: + if hasattr(args, "extra_inputs"): + key = "" + for input_str in args.extra_inputs: try: key, value = input_str.split(":", 1) - request_inputs[key] = value + if not key or not value: + raise ValueError("Key or value is empty") except ValueError: - args.error( - f"Invalid input format for --synthetic--extra-inputs: {input_str}" + raise ValueError( + f"Invalid input format for --extra-inputs: {input_str}\n" "Expected input format: 'key:value'" ) + if key in request_inputs: + raise ValueError( + f"Key already exists in request_inputs dictionary: {key}" + ) + request_inputs[key] = value + return request_inputs diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index 79339f416..38fba8f08 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -29,7 +29,6 @@ import genai_perf.utils as utils import pytest from genai_perf import __version__, parser -from genai_perf.exceptions import GenAIPerfException from genai_perf.llm_inputs.llm_inputs import OutputFormat, PromptSource from genai_perf.main import run @@ -76,17 +75,17 @@ def test_help_version_arguments_output_and_exit( {"endpoint": "v1/chat/completions"}, ), ( - ["--synthetic-extra-inputs", "random_key:random_value"], - {"synthetic_extra_inputs": ["random_key:random_value"]}, + ["--extra-inputs", "test_key:test_value"], + {"extra_inputs": ["test_key:test_value"]}, ), ( [ - "--synthetic-extra-inputs", - "random_key:5", - "--synthetic-extra-inputs", - "another_random_key:6", + "--extra-inputs", + "test_key:5", + "--extra-inputs", + "another_test_key:6", ], - {"synthetic_extra_inputs": ["random_key:5", "another_random_key:6"]}, + {"extra_inputs": ["test_key:5", "another_test_key:6"]}, ), ( ["--synthetic-requested-output-tokens", "5"], @@ -233,3 +232,35 @@ def test_inferred_output_format(self, monkeypatch, args, expected_format): parsed_args, _ = parser.parse_args() assert parsed_args.output_format == expected_format + + @pytest.mark.parametrize( + "args, expected_error", + [ + ( + ["--extra-inputs", "hi:"], + "Invalid input format for --extra-inputs: hi:\nExpected input format: 'key:value'", + ), + ( + ["--extra-inputs", ":a"], + "Invalid input format for --extra-inputs: :a\nExpected input format: 'key:value'", + ), + ( + ["--extra-inputs", "unknown"], + "Invalid input format for --extra-inputs: unknown\nExpected input format: 'key:value'", + ), + ( + ["--extra-inputs", "test_key:5", "--extra-inputs", "test_key:6"], + "Key already exists in request_inputs dictionary: test_key", + ), + ], + ) + def test_repeated_extra_arg_warning(self, monkeypatch, args, expected_error): + combined_args = ["genai-perf", "-m", "test_model"] + args + monkeypatch.setattr("sys.argv", combined_args) + + parsed_args, _ = parser.parse_args() + + with pytest.raises(ValueError) as exc_info: + _ = parser.get_extra_inputs_as_dict(parsed_args) + + assert str(exc_info.value) == expected_error diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index 7ca453117..cda457a70 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -385,11 +385,11 @@ def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None assert len(pa_json["data"]) == 5 + # Verify that each entry in the generated JSON includes the additional key-value pairs if ( output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS or output_format == OutputFormat.OPENAI_COMPLETIONS ): - # Verify that each entry in the generated JSON includes the additional key-value pairs for entry in pa_json.get("data", []): assert "payload" in entry, "Payload is missing in the request" payload = entry["payload"] @@ -401,7 +401,6 @@ def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None item["additional_key"] == "additional_value" ), "The value of additional_key is incorrect" elif output_format == OutputFormat.TRTLLM or output_format == OutputFormat.VLLM: - # Verify that each entry in the generated JSON includes the additional key-value pairs for entry in pa_json.get("data", []): assert ( "additional_key" in entry @@ -411,7 +410,3 @@ def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None ), "The value of additional_key is incorrect" else: assert False, f"Unsupported output format: {output_format}" - - print( - "Test passed: --request-input key:value is correctly added to every request in input-data.json" - ) From b414590d5abe26c79a9708023aaf6cc162941460 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 27 Mar 2024 15:11:02 -0700 Subject: [PATCH 07/14] Make caught exception more specific. --- src/c++/perf_analyzer/genai-perf/genai_perf/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py index b86ad8845..312190092 100755 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py @@ -47,7 +47,7 @@ def generate_inputs(args: ArgumentParser, tokenizer: AutoTokenizer) -> None: add_model_name = True try: extra_input_dict = parser.get_extra_inputs_as_dict(args) - except Exception as e: + except ValueError as e: raise GenAIPerfException(e) LlmInputs.create_llm_inputs( From 4f0761202843f0246d013f6f576cd336a5a4dd4e Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 Apr 2024 16:49:06 -0700 Subject: [PATCH 08/14] Fix inputs, change warning to error --- .../genai_perf/llm_inputs/llm_inputs.py | 4 +-- .../genai-perf/genai_perf/parser.py | 27 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py index 2d648fb52..22b80b01a 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py @@ -788,7 +788,7 @@ def _add_optional_tags_to_vllm_json( if add_stream: pa_json["data"][index]["stream"] = [True] for key, value in extra_inputs.items(): - pa_json["data"][index][key] = value + pa_json["data"][index][key] = [value] return pa_json @@ -807,7 +807,7 @@ def _add_optional_tags_to_trtllm_json( if add_stream: pa_json["data"][index]["stream"] = [True] for key, value in extra_inputs.items(): - pa_json["data"][index][key] = value + pa_json["data"][index][key] = [value] return pa_json diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index fedca828c..5a0e0f56a 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -55,8 +55,8 @@ def _check_conditional_args( elif args.endpoint == "v1/completions": args.output_format = OutputFormat.OPENAI_COMPLETIONS elif args.endpoint is not None: - logger.warning( - "The --endpoint option is ignored when not using the 'openai' service-kind." + parser.error( + "The --endpoint option should only be used when using the 'openai' service-kind." ) if args.service_kind == "triton": args = _convert_str_to_enum_entry(args, "backend", OutputFormat) @@ -113,7 +113,7 @@ def handler(args, extra_args): def _add_input_args(parser): input_group = parser.add_argument_group("Input") - parser.add_argument( + input_group.add_argument( "--extra-inputs", action="append", help="Provide additional inputs to include with every request. " @@ -249,9 +249,8 @@ def _add_endpoint_args(parser): type=str, choices=["v1/chat/completions", "v1/completions"], required=False, - help="The endpoint to send requests to on the " - 'server. This is required when using the "openai" service-kind. ' - "This is ignored in other cases.", + help=f"The endpoint to send requests to on the " + 'server. This is only used with the "openai" service-kind. ', ) endpoint_group.add_argument( @@ -339,6 +338,22 @@ def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: f"Invalid input format for --extra-inputs: {input_str}\n" "Expected input format: 'key:value'" ) + + # Convert the value to a bool, int, or float if applicable + is_bool = value.lower() in ["true", "false"] + is_int = value.isdigit() + is_float = value.count(".") == 1 and ( + value[0] == "." or value.replace(".", "").isdigit() + ) + + # Convert value to bool, int, or float if applicable + if is_bool: + value = value.lower() == "true" + elif is_int: + value = int(value) + elif is_float: + value = float(value) + if key in request_inputs: raise ValueError( f"Key already exists in request_inputs dictionary: {key}" From cf9befab01676b9ab5fd6ca69e50225ac99cd564 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 Apr 2024 17:11:44 -0700 Subject: [PATCH 09/14] Fix tests to use lists and have required args --- src/c++/perf_analyzer/genai-perf/tests/test_cli.py | 7 +++++-- src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index 38fba8f08..fc0c942ec 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -69,9 +69,12 @@ def test_help_version_arguments_output_and_exit( "arg, expected_attributes", [ (["--concurrency", "3"], {"concurrency_range": "3"}), - (["--endpoint", "v1/completions"], {"endpoint": "v1/completions"}), ( - ["--endpoint", "v1/chat/completions"], + ["--endpoint", "v1/completions", "--service-kind", "openai"], + {"endpoint": "v1/completions"}, + ), + ( + ["--endpoint", "v1/chat/completions", "--service-kind", "openai"], {"endpoint": "v1/chat/completions"}, ), ( diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index cda457a70..bd3cd173b 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -405,8 +405,8 @@ def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None assert ( "additional_key" in entry ), "The additional_key is not present in the request" - assert ( - entry["additional_key"] == "additional_value" - ), "The value of additional_key is incorrect" + assert entry["additional_key"] == [ + "additional_value" + ], "The value of additional_key is incorrect" else: assert False, f"Unsupported output format: {output_format}" From 06777487c93d761b842bd271b3776ea05c3cc8b7 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 Apr 2024 18:25:42 -0700 Subject: [PATCH 10/14] Update docs, use input_name instead of key --- src/c++/perf_analyzer/genai-perf/README.md | 12 ++++++++++++ .../genai-perf/genai_perf/parser.py | 18 +++++++++--------- .../perf_analyzer/genai-perf/tests/test_cli.py | 8 ++++---- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index e6bdec495..986d20675 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -131,6 +131,11 @@ options: * `--num-prompts`: The number of unique prompts to generate. * `--dataset`: HuggingFace dataset to use for benchmarking. +You can optionally set additional model inputs with the following option: +* `--extra-inputs {input_name}:{value}`: An additional input for use with the model with a singular value, +such as `Stream:True` or `max_tokens:5`. This flag can be repeated to provide multiple inputs. + + # Metrics GenAI-Perf collects a diverse set of metrics that captures the performance of @@ -241,6 +246,13 @@ both infer per second and latency. Enables the use of the streaming API. +##### `--extra-inputs` + +Provides an additional input for use with the model with a singular value, +such as `Stream:True` or `max_tokens:5`. This flag can be repeated to provide multiple inputs. + + + ##### `--endpoint {v1/completions,v1/chat/completions}` Describes what endpoint to send requests to on the server. This is required when diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 5a0e0f56a..977bbe699 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -117,7 +117,7 @@ def _add_input_args(parser): "--extra-inputs", action="append", help="Provide additional inputs to include with every request. " - "You can repeat this flag for multiple inputs. Inputs should be in a key:value format.", + "You can repeat this flag for multiple inputs. Inputs should be in a input_name:value format.", ) input_group.add_argument( @@ -327,16 +327,16 @@ def _add_other_args(parser): def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: request_inputs = {} if hasattr(args, "extra_inputs"): - key = "" + input_name = "" for input_str in args.extra_inputs: try: - key, value = input_str.split(":", 1) - if not key or not value: - raise ValueError("Key or value is empty") + input_name, value = input_str.split(":", 1) + if not input_name or not value: + raise ValueError("Input_name or value is empty") except ValueError: raise ValueError( f"Invalid input format for --extra-inputs: {input_str}\n" - "Expected input format: 'key:value'" + "Expected input format: 'input_name:value'" ) # Convert the value to a bool, int, or float if applicable @@ -354,11 +354,11 @@ def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: elif is_float: value = float(value) - if key in request_inputs: + if input_name in request_inputs: raise ValueError( - f"Key already exists in request_inputs dictionary: {key}" + f"Input name already exists in request_inputs dictionary: {input_name}" ) - request_inputs[key] = value + request_inputs[input_name] = value return request_inputs diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index fc0c942ec..b9a472a5a 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -241,19 +241,19 @@ def test_inferred_output_format(self, monkeypatch, args, expected_format): [ ( ["--extra-inputs", "hi:"], - "Invalid input format for --extra-inputs: hi:\nExpected input format: 'key:value'", + "Invalid input format for --extra-inputs: hi:\nExpected input format: 'input_name:value'", ), ( ["--extra-inputs", ":a"], - "Invalid input format for --extra-inputs: :a\nExpected input format: 'key:value'", + "Invalid input format for --extra-inputs: :a\nExpected input format: 'input_name:value'", ), ( ["--extra-inputs", "unknown"], - "Invalid input format for --extra-inputs: unknown\nExpected input format: 'key:value'", + "Invalid input format for --extra-inputs: unknown\nExpected input format: 'input_name:value'", ), ( ["--extra-inputs", "test_key:5", "--extra-inputs", "test_key:6"], - "Key already exists in request_inputs dictionary: test_key", + "Input name already exists in request_inputs dictionary: test_key", ), ], ) From 6217783d399e6c39e213ab5cb394b50c1f9920d6 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 Apr 2024 18:28:09 -0700 Subject: [PATCH 11/14] Change wording --- src/c++/perf_analyzer/genai-perf/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index 986d20675..ba7433247 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -133,7 +133,7 @@ options: You can optionally set additional model inputs with the following option: * `--extra-inputs {input_name}:{value}`: An additional input for use with the model with a singular value, -such as `Stream:True` or `max_tokens:5`. This flag can be repeated to provide multiple inputs. +such as `Stream:True` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. # Metrics @@ -249,7 +249,7 @@ Enables the use of the streaming API. ##### `--extra-inputs` Provides an additional input for use with the model with a singular value, -such as `Stream:True` or `max_tokens:5`. This flag can be repeated to provide multiple inputs. +such as `Stream:True` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. From f02419ff3e3320a44114f0f6407ccb33d894a145 Mon Sep 17 00:00:00 2001 From: dyastremsky <58150256+dyastremsky@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:22:31 -0700 Subject: [PATCH 12/14] Change Boolean example to lower case. Co-authored-by: Hyunjae Woo <107147848+nv-hwoo@users.noreply.github.com> --- src/c++/perf_analyzer/genai-perf/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index ba7433247..ebfd7e49c 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -133,7 +133,7 @@ options: You can optionally set additional model inputs with the following option: * `--extra-inputs {input_name}:{value}`: An additional input for use with the model with a singular value, -such as `Stream:True` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. +such as `stream:true` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. # Metrics From 438606451735785815575fc3c37385b7ce19a4cc Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 3 Apr 2024 09:35:52 -0700 Subject: [PATCH 13/14] Update tests, get rid of try-catch --- src/c++/perf_analyzer/genai-perf/README.md | 2 +- .../genai-perf/genai_perf/parser.py | 19 ++++++++++--------- .../genai-perf/tests/test_cli.py | 8 ++++++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index ebfd7e49c..ec77e1223 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -249,7 +249,7 @@ Enables the use of the streaming API. ##### `--extra-inputs` Provides an additional input for use with the model with a singular value, -such as `Stream:True` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. +such as `stream:true` or `max_tokens:5`. This flag can be repeated to supply multiple extra inputs. diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 977bbe699..e9155a4d5 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -117,7 +117,7 @@ def _add_input_args(parser): "--extra-inputs", action="append", help="Provide additional inputs to include with every request. " - "You can repeat this flag for multiple inputs. Inputs should be in a input_name:value format.", + "You can repeat this flag for multiple inputs. Inputs should be in an input_name:value format.", ) input_group.add_argument( @@ -327,26 +327,27 @@ def _add_other_args(parser): def get_extra_inputs_as_dict(args: argparse.ArgumentParser) -> dict: request_inputs = {} if hasattr(args, "extra_inputs"): - input_name = "" for input_str in args.extra_inputs: - try: - input_name, value = input_str.split(":", 1) - if not input_name or not value: - raise ValueError("Input_name or value is empty") - except ValueError: + semicolon_count = input_str.count(":") + if semicolon_count != 1: raise ValueError( f"Invalid input format for --extra-inputs: {input_str}\n" "Expected input format: 'input_name:value'" ) + input_name, value = input_str.split(":", 1) + + if not input_name or not value: + raise ValueError( + f"Input name or value is empty in --extra-inputs: {input_str}\n" + "Expected input format: 'input_name:value'" + ) - # Convert the value to a bool, int, or float if applicable is_bool = value.lower() in ["true", "false"] is_int = value.isdigit() is_float = value.count(".") == 1 and ( value[0] == "." or value.replace(".", "").isdigit() ) - # Convert value to bool, int, or float if applicable if is_bool: value = value.lower() == "true" elif is_int: diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index b9a472a5a..dd72fc22f 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -241,11 +241,15 @@ def test_inferred_output_format(self, monkeypatch, args, expected_format): [ ( ["--extra-inputs", "hi:"], - "Invalid input format for --extra-inputs: hi:\nExpected input format: 'input_name:value'", + "Input name or value is empty in --extra-inputs: hi:\nExpected input format: 'input_name:value'", ), ( ["--extra-inputs", ":a"], - "Invalid input format for --extra-inputs: :a\nExpected input format: 'input_name:value'", + "Input name or value is empty in --extra-inputs: :a\nExpected input format: 'input_name:value'", + ), + ( + ["--extra-inputs", ":a:"], + "Invalid input format for --extra-inputs: :a:\nExpected input format: 'input_name:value'", ), ( ["--extra-inputs", "unknown"], From f684dbf26002817ba2151c027d22550a97f9008a Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 3 Apr 2024 09:40:49 -0700 Subject: [PATCH 14/14] Remove extra comments, remove default value --- .../perf_analyzer/genai-perf/tests/test_llm_inputs.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index bd3cd173b..7e119ac44 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -369,28 +369,25 @@ def test_synthetic_to_openai_completions(self, default_tokenizer): ], ) def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None: - # Simulate --request-input arguments request_inputs = {"additional_key": "additional_value"} - # Generate input data with the additional request inputs pa_json = LlmInputs.create_llm_inputs( input_type=PromptSource.SYNTHETIC, output_format=output_format, - num_of_output_prompts=5, # Generate a small number of prompts for the test + num_of_output_prompts=5, add_model_name=False, add_stream=True, tokenizer=default_tokenizer, - extra_inputs=request_inputs, # Pass the simulated --request-input arguments here + extra_inputs=request_inputs, ) assert len(pa_json["data"]) == 5 - # Verify that each entry in the generated JSON includes the additional key-value pairs if ( output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS or output_format == OutputFormat.OPENAI_COMPLETIONS ): - for entry in pa_json.get("data", []): + for entry in pa_json["data"]: assert "payload" in entry, "Payload is missing in the request" payload = entry["payload"] for item in payload: @@ -401,7 +398,7 @@ def test_llm_inputs_extra_inputs(self, default_tokenizer, output_format) -> None item["additional_key"] == "additional_value" ), "The value of additional_key is incorrect" elif output_format == OutputFormat.TRTLLM or output_format == OutputFormat.VLLM: - for entry in pa_json.get("data", []): + for entry in pa_json["data"]: assert ( "additional_key" in entry ), "The additional_key is not present in the request"