Skip to content

Commit 0565603

Browse files
AndyDai-nvlkomali
authored andcommitted
Support Goodput metric (#32)
GenAI-Perf goodput support implementation
1 parent f492c91 commit 0565603

23 files changed

+1262
-20
lines changed

genai-perf/genai_perf/export_data/console_exporter.py

+3
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def _construct_table(self, table: Table) -> None:
8484
for metric in self._metrics.system_metrics:
8585
metric_str = metric.name.replace("_", " ").capitalize()
8686
# metric_str = metric_str.replace("throughput", "tput")
87+
if metric.name == "request_goodput":
88+
if not self._args.goodput:
89+
continue
8790
metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
8891
row_values = [metric_str]
8992
for stat in self.STAT_COLUMN_KEYS:

genai-perf/genai_perf/export_data/csv_exporter.py

+3
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ def _write_system_metrics(self, csv_writer) -> None:
9494
for metric in self._metrics.system_metrics:
9595
metric_str = metric.name.replace("_", " ").title()
9696
metric_str += f" ({metric.unit})"
97+
if metric.name == "request_goodput":
98+
if not self._args.goodput:
99+
continue
97100
value = self._stats[metric.name]["avg"]
98101
csv_writer.writerow([metric_str, f"{value:.2f}"])
99102

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
28+
from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
# * Redistributions of source code must retain the above copyright
9+
# notice, this list of conditions and the following disclaimer.
10+
# * Redistributions in binary form must reproduce the above copyright
11+
# notice, this list of conditions and the following disclaimer in the
12+
# documentation and/or other materials provided with the distribution.
13+
# * Neither the name of NVIDIA CORPORATION nor the names of its
14+
# contributors may be used to endorse or promote products derived
15+
# from this software without specific prior written permission.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
18+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
25+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
29+
30+
from abc import ABC, abstractmethod
31+
from typing import Dict, List, Optional
32+
33+
from genai_perf.metrics import Metrics
34+
35+
36+
class GoodputCalculator(ABC):
37+
"""A base class to calculate goodput according to goodput constraints."""
38+
39+
MS_TO_NS_CONVERSION = 1e6
40+
INVALID_GOODPUT = [-1.0]
41+
42+
def __init__(
43+
self,
44+
goodput_constraints: Dict[str, float],
45+
metric: Metrics,
46+
benchmark_duration: float,
47+
) -> None:
48+
self._goodput_constraints = goodput_constraints
49+
self._benchmark_duration = benchmark_duration
50+
self._metric = metric
51+
self._goodput_val: Optional[List[float]] = None
52+
self._slo_names = {
53+
"request_latency": "request_latencies",
54+
}
55+
56+
def compute(self) -> None:
57+
"""
58+
Compute the goodput result.
59+
60+
The compute method sets the valid goodput constraints from user's
61+
inputs, aggregates request metric values, counts the number of good requests,
62+
and calculates the final goodput.
63+
"""
64+
self._set_valid_slos()
65+
self._combine_requests_metric_values()
66+
good_count = self._count_good_reqs()
67+
self._compute_goodput(good_count)
68+
69+
@abstractmethod
70+
def _set_valid_slos(self) -> None:
71+
"""Set the valid goodput constraints while logging any invalid ones."""
72+
pass
73+
74+
@abstractmethod
75+
def _combine_requests_metric_values(self) -> None:
76+
"""
77+
Combine values from the metrics that match with the valid
78+
goodput constraints at a per request level.
79+
"""
80+
pass
81+
82+
@abstractmethod
83+
def _count_good_reqs(self) -> Optional[int]:
84+
"""Count the number of good requests according to goodput constraints."""
85+
pass
86+
87+
@abstractmethod
88+
def _compute_goodput(self, good_count) -> None:
89+
"""Compute the goodput."""
90+
pass
91+
92+
@property
93+
def goodput(self) -> Optional[List[float]]:
94+
return self._goodput_val
95+
96+
def get_slo_name(self, metric_name: str) -> str:
97+
"""Returns the plural name of a given metric."""
98+
if metric_name in self._slo_names:
99+
return self._slo_names[metric_name]
100+
else:
101+
raise KeyError(f"No metric named '{metric_name}' exists.")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions
7+
# are met:
8+
# * Redistributions of source code must retain the above copyright
9+
# notice, this list of conditions and the following disclaimer.
10+
# * Redistributions in binary form must reproduce the above copyright
11+
# notice, this list of conditions and the following disclaimer in the
12+
# documentation and/or other materials provided with the distribution.
13+
# * Neither the name of NVIDIA CORPORATION nor the names of its
14+
# contributors may be used to endorse or promote products derived
15+
# from this software without specific prior written permission.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
18+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
25+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
29+
from typing import Dict, List, Optional, Union
30+
31+
import genai_perf.logging as logging
32+
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
33+
from genai_perf.metrics.llm_metrics import LLMMetrics
34+
from genai_perf.metrics.metrics import Metrics
35+
36+
logger = logging.getLogger(__name__)
37+
38+
39+
class LLMGoodputCalculator(GoodputCalculator):
40+
"""
41+
A subclass to calculate goodput for LLMs according to
42+
LLM-related goodput constraints.
43+
"""
44+
45+
def __init__(
46+
self,
47+
goodput_constraints: Dict[str, float],
48+
metric: Union[LLMMetrics, Metrics],
49+
benchmark_duration: float,
50+
) -> None:
51+
super().__init__(goodput_constraints, metric, benchmark_duration)
52+
53+
self._set_valid_metric_names()
54+
55+
self._has_time_target = False
56+
self._has_throughput_target = False
57+
58+
self._add_slo_mapping()
59+
60+
def _set_valid_metric_names(self) -> None:
61+
self._valid_time_related_names = [
62+
item.name for item in self._metric.request_time_metrics
63+
]
64+
self._valid_throughput_related_names = [
65+
item.name for item in self._metric.request_throughput_metrics
66+
]
67+
self._valid_metric_names = (
68+
self._valid_time_related_names + self._valid_throughput_related_names
69+
)
70+
71+
def _add_slo_mapping(self) -> None:
72+
self._slo_names["time_to_first_token"] = "time_to_first_tokens"
73+
self._slo_names["inter_token_latency"] = "inter_token_latencies"
74+
self._slo_names["output_token_throughput_per_request"] = (
75+
"output_token_throughputs_per_request"
76+
)
77+
78+
def _set_valid_slos(self) -> None:
79+
invalid_slos = []
80+
self._valid_time_related_slos = {}
81+
self._valid_throughput_related_slos = {}
82+
for slo_name, slo_value in self._goodput_constraints.items():
83+
if slo_name in self._valid_time_related_names:
84+
self._valid_time_related_slos[slo_name] = (
85+
slo_value * self.MS_TO_NS_CONVERSION
86+
)
87+
self._has_time_target = True
88+
elif slo_name in self._valid_throughput_related_names:
89+
self._valid_throughput_related_slos[slo_name] = slo_value
90+
self._has_throughput_target = True
91+
else:
92+
invalid_slos.append(slo_name)
93+
94+
if invalid_slos:
95+
valid_slos_list = ", ".join(self._valid_metric_names)
96+
logger.info(
97+
f"Invalid Service Level Objectives found: {', '.join(invalid_slos)}. "
98+
f"Valid Service Level Objectives are: {valid_slos_list}."
99+
)
100+
self._goodput_val = self.INVALID_GOODPUT
101+
102+
def _combine_requests_metric_values(self) -> None:
103+
if self.goodput == self.INVALID_GOODPUT:
104+
return
105+
106+
if self._has_time_target:
107+
time_names = [
108+
self.get_slo_name(key) for key in self._valid_time_related_slos
109+
]
110+
requests_time_metric_values = [
111+
self._metric.data[name] for name in time_names
112+
]
113+
114+
self._combined_requests_time_metric_values = list(
115+
zip(*requests_time_metric_values)
116+
)
117+
118+
if self._has_throughput_target:
119+
throughput_names = [
120+
self.get_slo_name(key) for key in self._valid_throughput_related_slos
121+
]
122+
requests_throughput_metric_values = [
123+
self._metric.data[name] for name in throughput_names
124+
]
125+
126+
self._combined_requests_throughput_metric_values = list(
127+
zip(*requests_throughput_metric_values)
128+
)
129+
130+
def _count_good_reqs(self) -> Optional[int]:
131+
if self.goodput == self.INVALID_GOODPUT:
132+
return None
133+
target_time_metric_values = []
134+
target_throughput_metric_values = []
135+
if self._has_time_target:
136+
num_of_requests = len(self._combined_requests_time_metric_values)
137+
target_time_metric_values = list(self._valid_time_related_slos.values())
138+
if self._has_throughput_target:
139+
num_of_requests = len(self._combined_requests_throughput_metric_values)
140+
target_throughput_metric_values = list(
141+
self._valid_throughput_related_slos.values()
142+
)
143+
144+
good_req_count = 0
145+
for idx in range(num_of_requests):
146+
is_good_request = True
147+
request_time_metric_values: List[float] = []
148+
request_throughput_metric_values: List[float] = []
149+
if self._has_time_target:
150+
request_time_metric_values = list(
151+
self._combined_requests_time_metric_values[idx]
152+
)
153+
if self._has_throughput_target:
154+
request_throughput_metric_values = list(
155+
self._combined_requests_throughput_metric_values[idx]
156+
)
157+
158+
for val, slo in zip(request_time_metric_values, target_time_metric_values):
159+
if val > slo:
160+
is_good_request = False
161+
break
162+
if is_good_request:
163+
for val, slo in zip(
164+
request_throughput_metric_values, target_throughput_metric_values
165+
):
166+
if val < slo:
167+
is_good_request = False
168+
break
169+
170+
if is_good_request:
171+
good_req_count += 1
172+
173+
return good_req_count
174+
175+
def _compute_goodput(self, good_count) -> None:
176+
if self.goodput == self.INVALID_GOODPUT:
177+
return
178+
else:
179+
self._goodput_val = [good_count / self._benchmark_duration]

genai-perf/genai_perf/logging.py

+5
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ def init_logging() -> None:
9090
"level": "DEBUG",
9191
"propagate": False,
9292
},
93+
"genai_perf.goodput_calculator.llm_goodput_calculator": {
94+
"handlers": ["console"],
95+
"level": "DEBUG",
96+
"propagate": False,
97+
},
9398
},
9499
}
95100
logging.config.dictConfig(LOGGING_CONFIG)

genai-perf/genai_perf/main.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,17 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
9898

9999
def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> ProfileDataParser:
100100
if args.endpoint_type in ["embeddings", "rankings"]:
101-
return ProfileDataParser(args.profile_export_file)
101+
return ProfileDataParser(
102+
args.profile_export_file,
103+
goodput_constraints=args.goodput,
104+
)
102105
elif args.endpoint_type == "image_retrieval":
103106
return ImageRetrievalProfileDataParser(args.profile_export_file)
104107
else:
105108
return LLMProfileDataParser(
106109
filename=args.profile_export_file,
107110
tokenizer=tokenizer,
111+
goodput_constraints=args.goodput,
108112
)
109113

110114

0 commit comments

Comments
 (0)