skip first and last in metrics calculation

Doris26 · Doris26 · commit 23679a4e5928 · 2025-04-25T20:25:19.000Z
diff --git a/dags/map_reproducibility/utils/common_utils.py b/dags/map_reproducibility/utils/common_utils.py
@@ -39,6 +39,22 @@
 MAX_TFLOP = {"a3ultra": 989, "a3mega": 989, "a4": 2237}
 
 
+class Config:
+  """
+  A simple configuration class that allows dot notation access
+  to dictionary keys.
+  """
+
+  def __init__(self, **kwargs):
+    self.__dict__.update(kwargs)
+
+  def __repr__(self):
+    return repr(self.__dict__)
+
+  def __str__(self):
+    return str(self.__dict__)
+
+
 # This is required to get auth to access
 def git_cookie_authdaemon():
   auth_cmds = (
@@ -401,13 +417,27 @@ def copy_bucket_cmds_maxtext(tmpdir, bucket_name=BUCKET_NAME):
   return cmds
 
 
-def calculate_maxtext_metrics(log_location: str, hardware: str = "a3ultra"):
+def get_profiler_skip_steps(config: Config):
+  """Extract the number of steps to skip for the profiler from config."""
+  base_skip_steps = getattr(config, "dump_hlo", 1)
+  additional_skip_steps = getattr(config, "profiler_steps", 5)
+  return base_skip_steps + additional_skip_steps
+
+
+def calculate_maxtext_metrics(
+    log_location: str, hardware: str = "a3ultra", skip_first=2, skip_last=2
+):
   metrics, _ = metric.read_from_tb(log_location, None, None)
 
   print(f"metrics - {metrics}")
   step_time_metrics = metrics["perf/step_time_seconds"]
+
+  # Apply skip_first and skip_last when aggregating
   avg_step_time = metric.aggregate_metrics(
-      step_time_metrics, metric_config.AggregationStrategy.AVERAGE
+      step_time_metrics[skip_first:-skip_last]
+      if skip_last > 0
+      else step_time_metrics[skip_first:],
+      metric_config.AggregationStrategy.AVERAGE,
   )
 
   tflop_per_device_per_sec_metrics = metrics["perf/per_device_tflops_per_sec"]
@@ -707,22 +737,6 @@ def get_two_node_cmds(hypercomputer: str = "a3ultra"):
   return cmd
 
 
-class Config:
-  """
-  A simple configuration class that allows dot notation access
-  to dictionary keys.
-  """
-
-  def __init__(self, **kwargs):
-    self.__dict__.update(kwargs)
-
-  def __repr__(self):
-    return repr(self.__dict__)
-
-  def __str__(self):
-    return str(self.__dict__)
-
-
 def parse_internal_config_filename(filename, config=None):
   """
   Parse configuration values embedded in the filename.
diff --git a/dags/map_reproducibility/utils/internal_aotc_workload.py b/dags/map_reproducibility/utils/internal_aotc_workload.py
@@ -34,7 +34,7 @@
 from dags.map_reproducibility.utils.common_utils import get_bq_writer_path
 from dags.map_reproducibility.utils.common_utils import get_recipe_repo_path, get_internal_recipe_repo_path
 from dags.map_reproducibility.utils.common_utils import get_cluster
-from dags.map_reproducibility.utils.common_utils import calculate_maxtext_metrics
+from dags.map_reproducibility.utils.common_utils import calculate_maxtext_metrics, get_profiler_skip_steps
 from dags.map_reproducibility.utils.common_utils import copy_bucket_cmds_maxtext, get_job_gcs_bucket_folder
 from dags.map_reproducibility.utils.common_utils import parse_internal_config_filename
 from dags.map_reproducibility.utils.common_utils import parse_internal_config_content
@@ -158,12 +158,6 @@ def run_internal_aotc_workload(
 
     log_location = os.path.join(tmpdir, "tflog/metrics")
 
-    mfu, step_time = calculate_maxtext_metrics(
-        log_location, config.HYPERCOMPUTER
-    )
-
-    print(f"mfu: {mfu}")
-    print(f"step_time: {step_time}")
     comment = (
         "internal recipes regression tests"
         if not backfill
@@ -173,6 +167,16 @@ def run_internal_aotc_workload(
     gcs_bucket = get_job_gcs_bucket_folder(job_name)
     print(f"GCS bucket is {gcs_bucket}")
 
+    # calculate mfu based on the config
+    skip_first_n_steps_for_profiler = get_profiler_skip_steps(config)
+    mfu, step_time = calculate_maxtext_metrics(
+        log_location,
+        config.HYPERCOMPUTER,
+        skip_first=skip_first_n_steps_for_profiler,
+    )
+    print(f"mfu: {mfu}")
+    print(f"step_time: {step_time}")
+
     write_run(
         model_id=config.HELM_NAME_MODEL_ID,
         hardware_id=config.HYPERCOMPUTER,
diff --git a/dags/map_reproducibility/utils/sample_workload_utils.py b/dags/map_reproducibility/utils/sample_workload_utils.py
@@ -38,6 +38,7 @@
     parse_internal_config_content,
     get_patheon_job_link,
     find_xprof_gcs_path,
+    get_profiler_skip_steps,
 )
 
 from dags.map_reproducibility.utils.benchmarkdb_utils import write_run
@@ -301,12 +302,6 @@ def run_internal_sample_aotc_workload(
       bq_writer_repo_root = get_bq_writer_path(tmpdir)
       log_location = os.path.join(tmpdir, "tflog/metrics")
 
-      mfu, step_time = calculate_maxtext_metrics(
-          log_location, config.HYPERCOMPUTER
-      )
-
-      print(f"mfu: {mfu}")
-      print(f"step_time: {step_time}")
       comment = "sample benchmarking run"
       gcs_bucket = get_job_gcs_bucket_folder(
           job_name, bucket_name=sample_run_bucket_name
@@ -329,6 +324,16 @@ def run_internal_sample_aotc_workload(
                 f"Profile command failed with error: {profiler_error_message}"
             )
 
+      # calculate mfu based on the config
+      skip_first_n_steps_for_profiler = get_profiler_skip_steps(config)
+      mfu, step_time = calculate_maxtext_metrics(
+          log_location,
+          config.HYPERCOMPUTER,
+          skip_first=skip_first_n_steps_for_profiler,
+      )
+      print(f"mfu: {mfu}")
+      print(f"step_time: {step_time}")
+
       write_run(
           model_id=config.HELM_NAME_MODEL_ID,
           hardware_id=config.HYPERCOMPUTER,