triton-inference-server · AndyDai-nv · Aug 23, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 8, 2024
diff --git a/genai-perf/genai_perf/export_data/console_exporter.py b/genai-perf/genai_perf/export_data/console_exporter.py
@@ -84,6 +84,9 @@ def _construct_table(self, table: Table) -> None:
         for metric in self._metrics.system_metrics:
             metric_str = metric.name.replace("_", " ").capitalize()
             # metric_str = metric_str.replace("throughput", "tput")
+            if metric.name == "request_goodput":
+                if not self._args.goodput:
+                    continue
             metric_str += f" ({metric.unit})" if metric.unit != "tokens" else ""
             row_values = [metric_str]
             for stat in self.STAT_COLUMN_KEYS:

diff --git a/genai-perf/genai_perf/export_data/csv_exporter.py b/genai-perf/genai_perf/export_data/csv_exporter.py
@@ -94,6 +94,9 @@ def _write_system_metrics(self, csv_writer) -> None:
         for metric in self._metrics.system_metrics:
             metric_str = metric.name.replace("_", " ").title()
             metric_str += f" ({metric.unit})"
+            if metric.name == "request_goodput":
+                if not self._args.goodput:
+                    continue
             value = self._stats[metric.name]["avg"]
             csv_writer.writerow([metric_str, f"{value:.2f}"])
 

diff --git a/genai-perf/genai_perf/goodput_calculator/__init__.py b/genai-perf/genai_perf/goodput_calculator/__init__.py
@@ -0,0 +1,28 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
+from genai_perf.goodput_calculator.llm_goodput_calculator import LLMGoodputCalculator
diff --git a/genai-perf/genai_perf/goodput_calculator/goodput_calculator.py b/genai-perf/genai_perf/goodput_calculator/goodput_calculator.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional
+
+from genai_perf.metrics import Metrics
+
+
+class GoodputCalculator(ABC):
+    """A base class to calculate goodput according to goodput constraints."""
+
+    MS_TO_NS_CONVERSION = 1e6
+    INVALID_GOODPUT = [-1.0]
+
+    def __init__(
+        self,
+        goodput_constraints: Dict[str, float],
+        metric: Metrics,
+        benchmark_duration: float,
+    ) -> None:
+        self._goodput_constraints = goodput_constraints
+        self._benchmark_duration = benchmark_duration
+        self._metric = metric
+        # goodput is defined as the number of completed requests per second
+        # that meet the Service Level Objectives
+        self._goodput: Optional[List[float]] = None
+        self._slo_names = {
+            "request_latency": "request_latencies",
+        }
+
+    def compute(self) -> None:
+        """
+        Compute the goodput result.
+
+        The compute method sets valid goodput constraints from users'
+        input, aggregates request metric values, counts the number of good requests,
+        and calculates the final goodput.
+        """
+        self._set_valid_slos()
+        self._combine_requests_metric_values()
+        good_count = self._count_good_reqs()
+        self._compute_goodput(good_count)
+
+    @abstractmethod
+    def _set_valid_slos(self) -> None:
+        """Set the valid goodput constraints while logging any invalid ones."""
+        pass
+
+    @abstractmethod
+    def _combine_requests_metric_values(self) -> None:
+        """
+        Combine values from the metrics that match with the valid
+        goodput constraints at a per request level.
+        """
+        pass
+
+    @abstractmethod
+    def _count_good_reqs(self) -> Optional[int]:
+        """Count the number of good requests according to goodput constraints."""
+        pass
+
+    @abstractmethod
+    def _compute_goodput(self, good_count) -> None:
+        """Compute the goodput."""
+        pass
+
+    @property
+    def goodput(self) -> Optional[List[float]]:
+        return self._goodput
+
+    def get_slo_name(self, metric_name: str) -> str:
+        """Returns the plural name of a given metric."""
+        if metric_name in self._slo_names:
+            return self._slo_names[metric_name]
+        else:
+            raise KeyError(f"No metric named '{metric_name}' exists.")
diff --git a/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py b/genai-perf/genai_perf/goodput_calculator/llm_goodput_calculator.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import Dict, List, Optional, Union
+
+import genai_perf.logging as logging
+from genai_perf.goodput_calculator.goodput_calculator import GoodputCalculator
+from genai_perf.metrics.llm_metrics import LLMMetrics
+from genai_perf.metrics.metrics import Metrics
+
+logger = logging.getLogger(__name__)
+
+
+class LLMGoodputCalculator(GoodputCalculator):
+    """
+    A subclass to calculate goodput for LLMs according to
+    LLM-related goodput constraints.
+    """
+
+    def __init__(
+        self,
+        goodput_constraints: Dict[str, float],
+        metric: Union[LLMMetrics, Metrics],
+        benchmark_duration: float,
+    ) -> None:
+        super().__init__(goodput_constraints, metric, benchmark_duration)
+
+        self._set_valid_metric_names()
+
+        self._has_time_target = False
+        self._has_throughput_target = False
+
+        self._add_slo_mapping()
+
+    def _set_valid_metric_names(self) -> None:
+        self._valid_time_related_names = [
+            item.name for item in self._metric.request_time_metrics
+        ]
+        self._valid_throughput_related_names = [
+            item.name for item in self._metric.request_throughput_metrics
+        ]
+        self._valid_metric_names = (
+            self._valid_time_related_names + self._valid_throughput_related_names
+        )
+
+    def _add_slo_mapping(self) -> None:
+        self._slo_names["time_to_first_token"] = "time_to_first_tokens"
+        self._slo_names["inter_token_latency"] = "inter_token_latencies"
+        self._slo_names["output_token_throughput_per_request"] = (
+            "output_token_throughputs_per_request"
+        )
+
+    def _set_valid_slos(self) -> None:
+        invalid_slos = []
+        self._valid_time_related_slos = {}
+        self._valid_throughput_related_slos = {}
+        for slo_name, slo_value in self._goodput_constraints.items():
+            if slo_name in self._valid_time_related_names:
+                self._valid_time_related_slos[slo_name] = (
+                    slo_value * self.MS_TO_NS_CONVERSION
+                )
+                self._has_time_target = True
+            elif slo_name in self._valid_throughput_related_names:
+                self._valid_throughput_related_slos[slo_name] = slo_value
+                self._has_throughput_target = True
+            else:
+                invalid_slos.append(slo_name)
+
+        if invalid_slos:
+            valid_slos_list = ", ".join(self._valid_metric_names)
+            logger.info(
+                f"Invalid Service Level Objectives found: {', '.join(invalid_slos)}. "
+                f"Valid Service Level Objectives are: {valid_slos_list}."
+            )
+            self._goodput = self.INVALID_GOODPUT
+
+    def _combine_requests_metric_values(self) -> None:
+        if self.goodput == self.INVALID_GOODPUT:
+            return
+
+        if self._has_time_target:
+            time_names = [
+                self.get_slo_name(key) for key in self._valid_time_related_slos
+            ]
+            requests_time_metric_values = [
+                self._metric.data[name] for name in time_names
+            ]
+
+            self._combined_requests_time_metric_values = list(
+                zip(*requests_time_metric_values)
+            )
+
+        if self._has_throughput_target:
+            throughput_names = [
+                self.get_slo_name(key) for key in self._valid_throughput_related_slos
+            ]
+            requests_throughput_metric_values = [
+                self._metric.data[name] for name in throughput_names
+            ]
+
+            self._combined_requests_throughput_metric_values = list(
+                zip(*requests_throughput_metric_values)
+            )
+
+    def _count_good_reqs(self) -> Optional[int]:
+        if self.goodput == self.INVALID_GOODPUT:
+            return None
+        target_time_metric_values = []
+        target_throughput_metric_values = []
+        if self._has_time_target:
+            num_of_requests = len(self._combined_requests_time_metric_values)
+            target_time_metric_values = list(self._valid_time_related_slos.values())
+        if self._has_throughput_target:
+            num_of_requests = len(self._combined_requests_throughput_metric_values)
+            target_throughput_metric_values = list(
+                self._valid_throughput_related_slos.values()
+            )
+
+        good_req_count = 0
+        for idx in range(num_of_requests):
+            is_good_request = True
+            request_time_metric_values: List[float] = []
+            request_throughput_metric_values: List[float] = []
+            if self._has_time_target:
+                request_time_metric_values = list(
+                    self._combined_requests_time_metric_values[idx]
+                )
+            if self._has_throughput_target:
+                request_throughput_metric_values = list(
+                    self._combined_requests_throughput_metric_values[idx]
+                )
+
+            for val, slo in zip(request_time_metric_values, target_time_metric_values):
+                if val > slo:
+                    is_good_request = False
+                    break
+            if is_good_request:
+                for val, slo in zip(
+                    request_throughput_metric_values, target_throughput_metric_values
+                ):
+                    if val < slo:
+                        is_good_request = False
+                        break
+
+            if is_good_request:
+                good_req_count += 1
+
+        return good_req_count
+
+    def _compute_goodput(self, good_count) -> None:
+        if self.goodput == self.INVALID_GOODPUT:
+            return
+        else:
+            self._goodput = [good_count / self._benchmark_duration]
diff --git a/genai-perf/genai_perf/logging.py b/genai-perf/genai_perf/logging.py
@@ -90,6 +90,11 @@ def init_logging() -> None:
                 "level": "DEBUG",
                 "propagate": False,
             },
+            "genai_perf.goodput_calculator.llm_goodput_calculator": {
+                "handlers": ["console"],
+                "level": "DEBUG",
+                "propagate": False,
+            },
         },
     }
     logging.config.dictConfig(LOGGING_CONFIG)

diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py
@@ -98,13 +98,17 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
 
 def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> ProfileDataParser:
     if args.endpoint_type in ["embeddings", "rankings"]:
-        return ProfileDataParser(args.profile_export_file)
+        return ProfileDataParser(
+            args.profile_export_file,
+            goodput_constraints=args.goodput,
+        )
     elif args.endpoint_type == "image_retrieval":
         return ImageRetrievalProfileDataParser(args.profile_export_file)
     else:
         return LLMProfileDataParser(
             filename=args.profile_export_file,
             tokenizer=tokenizer,
+            goodput_constraints=args.goodput,
         )