-
Notifications
You must be signed in to change notification settings - Fork 21
Goodput initial implementation #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
4b132a4
22eaaea
8f5f189
5d01561
5f46340
baa1d6d
bd48def
442b77b
d154afb
cfa1de2
88678b3
7ccbbc2
3e20eac
beef601
73c770b
9636153
2b3f267
be076d7
8d7abfc
b97b668
0232c08
f0e9faa
b170345
222d1dd
c34f9db
afa2784
a3ab155
274a0f2
f3c509f
9eaaa81
994b5b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions | ||
# are met: | ||
# * Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||
# contributors may be used to endorse or promote products derived | ||
# from this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator | ||
from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions | ||
# are met: | ||
# * Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||
# contributors may be used to endorse or promote products derived | ||
# from this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
|
||
from abc import ABC, abstractmethod | ||
from typing import Dict, List, Optional | ||
|
||
from genai_perf.metrics import Metrics | ||
|
||
|
||
class GoodputCalculator(ABC): | ||
"""A base class to calculate goodput according to SLOs.""" | ||
|
||
MS_TO_NS_CONVERSION = 1e6 | ||
INVALID_GOODPUT = [-1] | ||
|
||
def __init__( | ||
self, | ||
goodput_constraints: Dict[str, float], | ||
metric: Metrics, | ||
benchmark_duration: float, | ||
) -> None: | ||
self._goodput_constraints = goodput_constraints | ||
self._benchmark_duration = benchmark_duration | ||
self._metric = metric | ||
self._goodput = None | ||
self._slo_base_names = { | ||
"request_latency": "request_latencies", | ||
} | ||
|
||
def compute(self) -> None: | ||
""" | ||
Compute the goodput result. | ||
|
||
The GoodputCalculator class sets valid SLOs from users' input, aggregates | ||
request metric values, counts the number of good requests, and calculates | ||
the final goodput. | ||
""" | ||
self._set_valid_slos() | ||
self._combine_requests_metric_values() | ||
good_count = self._count_good_reqs() | ||
self._compute_goodput(good_count) | ||
|
||
@abstractmethod | ||
def _set_valid_slos(self) -> None: | ||
""" | ||
Check users' Service Level Objectives (SLOs) inputs. | ||
Set the valid ones while logging the invalid ones. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def _combine_requests_metric_values(self) -> None: | ||
""" | ||
Combine values from the metrics that match with the valid SLOs at a | ||
per request level. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def _count_good_reqs(self) -> Optional[int]: | ||
"""Count the number of good requests according to SLOs.""" | ||
pass | ||
|
||
@abstractmethod | ||
def _compute_goodput(self, good_count) -> None: | ||
"""Compute the goodput.""" | ||
pass | ||
|
||
@property | ||
def goodput(self) -> List[float]: | ||
return self._goodput | ||
|
||
def get_slo_base_name(self, metric_name: str) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when i see base name, i think singular or root name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. How about just get_slo_name? We need the plural form as key to retrieve data. Adding this conversion from singular to plural is to enable users to use singular forms as inputs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get_slo_name works |
||
"""Returns the plural name of a given metric.""" | ||
if metric_name in self._slo_base_names: | ||
return self._slo_base_names[metric_name] | ||
else: | ||
raise KeyError(f"No metric named '{metric_name}' exists.") |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,183 @@ | ||||||
#!/usr/bin/env python3 | ||||||
|
||||||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||||||
# | ||||||
# Redistribution and use in source and binary forms, with or without | ||||||
# modification, are permitted provided that the following conditions | ||||||
# are met: | ||||||
# * Redistributions of source code must retain the above copyright | ||||||
# notice, this list of conditions and the following disclaimer. | ||||||
# * Redistributions in binary form must reproduce the above copyright | ||||||
# notice, this list of conditions and the following disclaimer in the | ||||||
# documentation and/or other materials provided with the distribution. | ||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||||||
# contributors may be used to endorse or promote products derived | ||||||
# from this software without specific prior written permission. | ||||||
# | ||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|
||||||
from typing import Dict, Optional, Union | ||||||
|
||||||
import genai_perf.logging as logging | ||||||
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator | ||||||
from genai_perf.metrics.llm_metrics import LLMMetrics | ||||||
from genai_perf.metrics.metrics import Metrics | ||||||
|
||||||
logger = logging.getLogger(__name__) | ||||||
|
||||||
|
||||||
class LLMGoodputCalculator(GoodputCalculator): | ||||||
""" | ||||||
A subclass to calculate goodput for LLMs according to LLM-related SLOs. | ||||||
""" | ||||||
|
||||||
def __init__( | ||||||
self, | ||||||
goodput_constraints: Dict[str, float], | ||||||
metric: Union[LLMMetrics, Metrics], | ||||||
benchmark_duration: float, | ||||||
) -> None: | ||||||
super().__init__(goodput_constraints, metric, benchmark_duration) | ||||||
self._valid_time_related_names = [ | ||||||
dyastremsky marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
item.name for item in metric.request_time_metrics | ||||||
] | ||||||
self._valid_throughput_related_names = [ | ||||||
item.name for item in metric.request_throughput_metrics | ||||||
] | ||||||
self._valid_metric_names = ( | ||||||
self._valid_time_related_names + self._valid_throughput_related_names | ||||||
) | ||||||
self._has_time_target = False | ||||||
self._has_throughput_target = False | ||||||
|
||||||
# add slo base name mapping | ||||||
self._slo_base_names["time_to_first_token"] = "time_to_first_tokens" | ||||||
self._slo_base_names["inter_token_latency"] = "inter_token_latencies" | ||||||
self._slo_base_names["output_token_throughput_per_request"] = ( | ||||||
"output_token_throughputs_per_request" | ||||||
) | ||||||
|
||||||
def _set_valid_slos(self) -> None: | ||||||
""" | ||||||
dyastremsky marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
Check users' Service Level Objectives (SLOs) inputs. | ||||||
Set the valid ones while logging the invalid ones.s | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo
Suggested change
|
||||||
""" | ||||||
invalid_slos = [] | ||||||
self._valid_time_related_slos = {} | ||||||
self._valid_throughput_related_slos = {} | ||||||
for slo_name, slo_value in self._goodput_constraints.items(): | ||||||
if slo_name in self._valid_metric_names: | ||||||
if slo_name in self._valid_time_related_names: | ||||||
self._valid_time_related_slos[slo_name] = ( | ||||||
slo_value * self.MS_TO_NS_CONVERSION | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we converting here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The data stored in metrics is in ns unit, it is for later comparison during counting good requests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i see. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For time related metrics, yes. It is in help messages of args. |
||||||
) | ||||||
elif slo_name in self._valid_throughput_related_names: | ||||||
self._valid_throughput_related_slos[slo_name] = slo_value | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be an else here to catch errors. If/elif/else, not if/elif. |
||||||
else: | ||||||
invalid_slos.append(slo_name) | ||||||
if self._valid_time_related_slos: | ||||||
self._has_time_target = True | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we move this flag inside the loop where these values are added to the list? |
||||||
if self._valid_throughput_related_slos: | ||||||
self._has_throughput_target = True | ||||||
if invalid_slos: | ||||||
valid_slos_list = ", ".join(self._valid_metric_names) | ||||||
logger.info( | ||||||
f"Invalid SLOs found: {', '.join(invalid_slos)}. " | ||||||
f"The goodput will be -1. Valid SLOs are: {valid_slos_list}." | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can remove the statement that "The goodput will be -1". |
||||||
) | ||||||
self._goodput = self.INVALID_GOODPUT | ||||||
|
||||||
def _combine_requests_metric_values(self) -> None: | ||||||
""" | ||||||
Combine values from the metrics that match with the valid SLOs at a | ||||||
per request level. | ||||||
""" | ||||||
if self.goodput == self.INVALID_GOODPUT: | ||||||
return | ||||||
debermudez marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
if self._has_time_target: | ||||||
time_names = [ | ||||||
self.get_slo_base_name(key) for key in self._valid_time_related_slos | ||||||
] | ||||||
requests_time_metric_values = [ | ||||||
self._metric.data[name] for name in time_names | ||||||
] | ||||||
|
||||||
self._combined_requests_time_metric_values = list( | ||||||
zip(*requests_time_metric_values) | ||||||
) | ||||||
|
||||||
if self._has_throughput_target: | ||||||
throughput_names = [ | ||||||
self.get_slo_base_name(key) | ||||||
for key in self._valid_throughput_related_slos | ||||||
] | ||||||
requests_throughput_metric_values = [ | ||||||
self._metric.data[name] for name in throughput_names | ||||||
] | ||||||
|
||||||
self._combined_requests_throughput_metric_values = list( | ||||||
zip(*requests_throughput_metric_values) | ||||||
) | ||||||
|
||||||
def _count_good_reqs(self) -> Optional[int]: | ||||||
"""Count the number of good requests according to SLOs.""" | ||||||
if self.goodput == self.INVALID_GOODPUT: | ||||||
return None | ||||||
target_time_metric_values = [] | ||||||
target_throughput_metric_values = [] | ||||||
if self._has_time_target: | ||||||
num_of_requests = len(self._combined_requests_time_metric_values) | ||||||
target_time_metric_values = list(self._valid_time_related_slos.values()) | ||||||
if self._has_throughput_target: | ||||||
num_of_requests = len(self._combined_requests_throughput_metric_values) | ||||||
AndyDai-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
target_throughput_metric_values = list( | ||||||
self._valid_throughput_related_slos.values() | ||||||
) | ||||||
|
||||||
good_req_count = 0 | ||||||
for idx in range(num_of_requests): | ||||||
is_good_request = True | ||||||
request_time_metric_values = [] | ||||||
request_throughput_metric_values = [] | ||||||
if self._has_time_target: | ||||||
request_time_metric_values = self._combined_requests_time_metric_values[ | ||||||
idx | ||||||
] | ||||||
if self._has_throughput_target: | ||||||
request_throughput_metric_values = ( | ||||||
self._combined_requests_throughput_metric_values[idx] | ||||||
) | ||||||
for val, slo in zip(request_time_metric_values, target_time_metric_values): | ||||||
if val > slo: | ||||||
is_good_request = False | ||||||
break | ||||||
else: | ||||||
dyastremsky marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
for val, slo in zip( | ||||||
request_throughput_metric_values, target_throughput_metric_values | ||||||
): | ||||||
if val < slo: | ||||||
is_good_request = False | ||||||
break | ||||||
|
||||||
if is_good_request: | ||||||
good_req_count += 1 | ||||||
|
||||||
return good_req_count | ||||||
|
||||||
def _compute_goodput(self, good_count) -> None: | ||||||
"""Compute the goodput.""" | ||||||
if self.goodput == self.INVALID_GOODPUT: | ||||||
return | ||||||
else: | ||||||
self._goodput = [good_count / self._benchmark_duration] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please go through the code and remove the use of "SLOs". Someone then has to figure out what SLOs are if they're not already familiar. You can use a different term like "constraints" that are more universally understood.