-
Notifications
You must be signed in to change notification settings - Fork 21
Goodput initial implementation #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 27 commits
4b132a4
22eaaea
8f5f189
5d01561
5f46340
baa1d6d
bd48def
442b77b
d154afb
cfa1de2
88678b3
7ccbbc2
3e20eac
beef601
73c770b
9636153
2b3f267
be076d7
8d7abfc
b97b668
0232c08
f0e9faa
b170345
222d1dd
c34f9db
afa2784
a3ab155
274a0f2
f3c509f
9eaaa81
994b5b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions | ||
# are met: | ||
# * Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||
# contributors may be used to endorse or promote products derived | ||
# from this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator | ||
from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions | ||
# are met: | ||
# * Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||
# contributors may be used to endorse or promote products derived | ||
# from this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
|
||
from abc import ABC, abstractmethod | ||
from typing import Dict, List, Optional | ||
|
||
from genai_perf.metrics import Metrics | ||
|
||
|
||
class GoodputCalculator(ABC): | ||
"""A base class to calculate goodput according to goodput constraints.""" | ||
|
||
MS_TO_NS_CONVERSION = 1e6 | ||
INVALID_GOODPUT = [-1.0] | ||
|
||
def __init__( | ||
self, | ||
goodput_constraints: Dict[str, float], | ||
metric: Metrics, | ||
benchmark_duration: float, | ||
) -> None: | ||
self._goodput_constraints = goodput_constraints | ||
self._benchmark_duration = benchmark_duration | ||
self._metric = metric | ||
# goodput is defined as the number of completed requests per second | ||
# that meet the Service Level Objectives | ||
self._goodput: Optional[List[float]] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a variable name, i dont know what goodput is supposed to tell me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about adding comments to explain goodput? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. Add a comment to explain. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this a list? Isn't the goodput a single value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe there is a reason that throughput and latency are lists. So I keep goodput in the same form There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. It may be important to keep it consistent then. If goodput is just a single value in the list, I'd add a comment stating that ._goodput is a list with the goodput as its only value. |
||
self._slo_names = { | ||
"request_latency": "request_latencies", | ||
} | ||
|
||
def compute(self) -> None: | ||
""" | ||
Compute the goodput result. | ||
|
||
The compute method sets valid goodput constraints from users' | ||
input, aggregates request metric values, counts the number of good requests, | ||
dyastremsky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
and calculates the final goodput. | ||
""" | ||
self._set_valid_slos() | ||
self._combine_requests_metric_values() | ||
good_count = self._count_good_reqs() | ||
self._compute_goodput(good_count) | ||
|
||
@abstractmethod | ||
def _set_valid_slos(self) -> None: | ||
"""Set the valid goodput constraints while logging any invalid ones.""" | ||
pass | ||
|
||
@abstractmethod | ||
def _combine_requests_metric_values(self) -> None: | ||
""" | ||
Combine values from the metrics that match with the valid | ||
goodput constraints at a per request level. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def _count_good_reqs(self) -> Optional[int]: | ||
"""Count the number of good requests according to goodput constraints.""" | ||
pass | ||
|
||
@abstractmethod | ||
def _compute_goodput(self, good_count) -> None: | ||
"""Compute the goodput.""" | ||
pass | ||
|
||
@property | ||
def goodput(self) -> Optional[List[float]]: | ||
return self._goodput | ||
|
||
def get_slo_name(self, metric_name: str) -> str: | ||
"""Returns the plural name of a given metric.""" | ||
if metric_name in self._slo_names: | ||
return self._slo_names[metric_name] | ||
else: | ||
raise KeyError(f"No metric named '{metric_name}' exists.") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions | ||
# are met: | ||
# * Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of NVIDIA CORPORATION nor the names of its | ||
# contributors may be used to endorse or promote products derived | ||
# from this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
from typing import Dict, List, Optional, Union | ||
|
||
import genai_perf.logging as logging | ||
from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator | ||
from genai_perf.metrics.llm_metrics import LLMMetrics | ||
from genai_perf.metrics.metrics import Metrics | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class LLMGoodputCalculator(GoodputCalculator): | ||
""" | ||
A subclass to calculate goodput for LLMs according to | ||
LLM-related goodput constraints. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
goodput_constraints: Dict[str, float], | ||
metric: Union[LLMMetrics, Metrics], | ||
benchmark_duration: float, | ||
) -> None: | ||
super().__init__(goodput_constraints, metric, benchmark_duration) | ||
|
||
self._set_valid_metric_names() | ||
|
||
self._has_time_target = False | ||
self._has_throughput_target = False | ||
|
||
self._add_slo_mapping() | ||
|
||
def _set_valid_metric_names(self) -> None: | ||
self._valid_time_related_names = [ | ||
dyastremsky marked this conversation as resolved.
Show resolved
Hide resolved
|
||
item.name for item in self._metric.request_time_metrics | ||
] | ||
self._valid_throughput_related_names = [ | ||
item.name for item in self._metric.request_throughput_metrics | ||
] | ||
self._valid_metric_names = ( | ||
self._valid_time_related_names + self._valid_throughput_related_names | ||
) | ||
|
||
def _add_slo_mapping(self) -> None: | ||
self._slo_names["time_to_first_token"] = "time_to_first_tokens" | ||
self._slo_names["inter_token_latency"] = "inter_token_latencies" | ||
self._slo_names["output_token_throughput_per_request"] = ( | ||
"output_token_throughputs_per_request" | ||
) | ||
|
||
def _set_valid_slos(self) -> None: | ||
invalid_slos = [] | ||
self._valid_time_related_slos = {} | ||
self._valid_throughput_related_slos = {} | ||
for slo_name, slo_value in self._goodput_constraints.items(): | ||
if slo_name in self._valid_time_related_names: | ||
self._valid_time_related_slos[slo_name] = ( | ||
slo_value * self.MS_TO_NS_CONVERSION | ||
) | ||
self._has_time_target = True | ||
elif slo_name in self._valid_throughput_related_names: | ||
self._valid_throughput_related_slos[slo_name] = slo_value | ||
self._has_throughput_target = True | ||
else: | ||
invalid_slos.append(slo_name) | ||
|
||
if invalid_slos: | ||
valid_slos_list = ", ".join(self._valid_metric_names) | ||
logger.info( | ||
f"Invalid Service Level Objectives found: {', '.join(invalid_slos)}. " | ||
f"Valid Service Level Objectives are: {valid_slos_list}." | ||
) | ||
self._goodput = self.INVALID_GOODPUT | ||
|
||
def _combine_requests_metric_values(self) -> None: | ||
if self.goodput == self.INVALID_GOODPUT: | ||
return | ||
debermudez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if self._has_time_target: | ||
time_names = [ | ||
self.get_slo_name(key) for key in self._valid_time_related_slos | ||
] | ||
requests_time_metric_values = [ | ||
self._metric.data[name] for name in time_names | ||
] | ||
|
||
self._combined_requests_time_metric_values = list( | ||
zip(*requests_time_metric_values) | ||
) | ||
|
||
if self._has_throughput_target: | ||
throughput_names = [ | ||
self.get_slo_name(key) for key in self._valid_throughput_related_slos | ||
] | ||
requests_throughput_metric_values = [ | ||
self._metric.data[name] for name in throughput_names | ||
] | ||
|
||
self._combined_requests_throughput_metric_values = list( | ||
zip(*requests_throughput_metric_values) | ||
) | ||
|
||
def _count_good_reqs(self) -> Optional[int]: | ||
if self.goodput == self.INVALID_GOODPUT: | ||
return None | ||
target_time_metric_values = [] | ||
target_throughput_metric_values = [] | ||
if self._has_time_target: | ||
num_of_requests = len(self._combined_requests_time_metric_values) | ||
target_time_metric_values = list(self._valid_time_related_slos.values()) | ||
if self._has_throughput_target: | ||
num_of_requests = len(self._combined_requests_throughput_metric_values) | ||
AndyDai-nv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
target_throughput_metric_values = list( | ||
self._valid_throughput_related_slos.values() | ||
) | ||
|
||
good_req_count = 0 | ||
for idx in range(num_of_requests): | ||
is_good_request = True | ||
request_time_metric_values: List[float] = [] | ||
request_throughput_metric_values: List[float] = [] | ||
if self._has_time_target: | ||
request_time_metric_values = list( | ||
self._combined_requests_time_metric_values[idx] | ||
) | ||
if self._has_throughput_target: | ||
request_throughput_metric_values = list( | ||
self._combined_requests_throughput_metric_values[idx] | ||
) | ||
|
||
for val, slo in zip(request_time_metric_values, target_time_metric_values): | ||
if val > slo: | ||
is_good_request = False | ||
break | ||
if is_good_request: | ||
for val, slo in zip( | ||
request_throughput_metric_values, target_throughput_metric_values | ||
): | ||
if val < slo: | ||
is_good_request = False | ||
break | ||
|
||
if is_good_request: | ||
good_req_count += 1 | ||
|
||
return good_req_count | ||
|
||
def _compute_goodput(self, good_count) -> None: | ||
if self.goodput == self.INVALID_GOODPUT: | ||
return | ||
else: | ||
self._goodput = [good_count / self._benchmark_duration] |
Uh oh!
There was an error while loading. Please reload this page.