Reduce qualx logging noise (#1603)

leewyang · web-flow · commit 1d8279d69ed9 · 2025-04-01T15:44:44.000-07:00
This PR fixes #1535, reducing logging from qualx by changing most of the noisy logs to debug level. Signed-off-by: Lee Yang <leewyang@gmail.com>
diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/model.py b/user_tools/src/spark_rapids_tools/tools/qualx/model.py
@@ -77,7 +77,7 @@ def train(
         drop=True
     )
     if cpu_aug_tbl.shape[0] < original_num_rows:
-        logger.warning(
+        logger.debug(
             'Removed %d rows with NaN label values', original_num_rows - cpu_aug_tbl.shape[0]
         )
 
@@ -156,7 +156,7 @@ def predict(
     if missing:
         raise ValueError(f'Input is missing model features: {missing}')
     if extra:
-        logger.warning('Input had extra features not present in model: %s', extra)
+        logger.debug('Input had extra features not present in model: %s', extra)
 
     x = cpu_aug_tbl[model_features]
     y = cpu_aug_tbl[label_col] if label_col else None
@@ -248,7 +248,7 @@ def extract_model_features(
     gpu_aug_tbl = df[df['runType'] == 'GPU']
     if gpu_aug_tbl.shape[0] > 0:
         if gpu_aug_tbl.shape[0] != cpu_aug_tbl.shape[0]:
-            logger.warning(
+            logger.debug(
                 'Number of GPU rows (%d) does not match number of CPU rows (%d)',
                 gpu_aug_tbl.shape[0],
                 cpu_aug_tbl.shape[0],
@@ -276,7 +276,7 @@ def extract_model_features(
         if (
             num_na / num_rows > 0.05
         ):  # arbitrary threshold, misaligned sqlIDs still may 'match' most of the time
-            logger.warning(
+            logger.debug(
                 'Percentage of NaN GPU durations is high: %d / %d. Per-sql actual speedups may be inaccurate.',
                 num_na,
                 num_rows,
@@ -315,7 +315,7 @@ def extract_model_features(
         raise ValueError(f'Input data is missing model features: {missing}')
     if extra:
         # remove extra columns
-        logger.warning('Input data has extra features (removed): %s', extra)
+        logger.debug('Input data has extra features (removed): %s', extra)
         feature_cols = [c for c in feature_cols if c not in extra]
 
     # add train/val/test split column, if split function(s) provided
diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py b/user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
@@ -760,19 +760,19 @@ def impute(full_tbl: pd.DataFrame) -> pd.DataFrame:
     """Impute missing columns and delete extra columns."""
     actual_features = set(full_tbl.columns)
     if actual_features == expected_raw_features:
-        logger.info('Dataset has all expected features')
+        logger.debug('Dataset has all expected features')
     else:
         missing = sorted(expected_raw_features - actual_features)
         extra = sorted(actual_features - expected_raw_features)
         if missing:
-            logger.warning('Imputing missing features: %s', missing)
+            logger.debug('Imputing missing features: %s', missing)
             if 'fraction_supported' in missing:
                 full_tbl['fraction_supported'] = 1.0
                 missing.remove('fraction_supported')
             full_tbl.loc[:, missing] = 0
 
         if extra:
-            logger.warning('Removing extra features: %s', extra)
+            logger.debug('Removing extra features: %s', extra)
             full_tbl = full_tbl.drop(columns=extra)
 
         # one last check after modifications (update expected_raw_features if needed)
@@ -806,7 +806,7 @@ def scan_tbl(
             )
         except Exception as ex:  # pylint: disable=broad-except
             if warn_on_error or abort_on_error:
-                logger.warning('Failed to load %s for %s.', tb_name, app_id)
+                logger.debug('Failed to load %s for %s.', tb_name, app_id)
             if abort_on_error:
                 raise ScanTblError() from ex
             scan_result = pd.DataFrame()
@@ -1030,7 +1030,6 @@ def scan_tbl(
             stage_times = total_stage_time.merge(
                 failed_stage_time, on='sqlID', how='inner'
             )
-            stage_times.info()
             sqls_to_drop = set(
                 stage_times.loc[
                     stage_times.Duration_y
@@ -1039,7 +1038,7 @@ def scan_tbl(
             )
 
         if sqls_to_drop:
-            logger.warning('Ignoring sqlIDs %s due to excessive failed/cancelled stage duration.', sqls_to_drop)
+            logger.debug('Ignoring sqlIDs %s due to excessive failed/cancelled stage duration.', sqls_to_drop)
 
         if node_level_supp is not None and (qualtool_filter == 'stage'):
             job_stage_agg_tbl = job_stage_agg_tbl[
@@ -1118,13 +1117,15 @@ def scan_tbl(
             aborted_sql_ids = set()
 
         if aborted_sql_ids:
-            logger.warning('Ignoring sqlIDs %s due to aborted jobs.', aborted_sql_ids)
+            logger.debug('Ignoring sqlIDs %s due to aborted jobs.', aborted_sql_ids)
 
         sqls_to_drop = sqls_to_drop.union(aborted_sql_ids)
 
         if sqls_to_drop:
             logger.warning(
-                'Ignoring a total of %s sqlIDs due to stage/job failures.', len(sqls_to_drop)
+                'Ignoring a total of %s sqlIDs due to stage/job failures for %s.',
+                len(sqls_to_drop),
+                app_id
             )
             app_info_mg = app_info_mg.loc[~app_info_mg.sqlID.isin(sqls_to_drop)]
 
diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/qualx_main.py b/user_tools/src/spark_rapids_tools/tools/qualx/qualx_main.py
@@ -146,7 +146,7 @@ def _get_model(platform: str,
     xgb.Booster model file.
     """
     model_path = _get_model_path(platform, model, variant)
-    logger.info('Loading model from: %s', model_path)
+    logger.debug('Loading model from: %s', model_path)
     xgb_model = xgb.Booster()
     xgb_model.load_model(model_path)
     return xgb_model
@@ -290,7 +290,7 @@ def _predict(
             if any(input_df['fraction_supported'] != 1.0)
             else 'raw'
         )
-        logger.info('Predicting dataset (%s): %s', filter_str, dataset)
+        logger.debug('Predicting dataset (%s): %s', filter_str, dataset)
         features, feature_cols, label_col = extract_model_features(input_df, {'default': split_fn})
         # note: dataset name is already stored in the 'appName' field
         try:
@@ -345,7 +345,7 @@ def _read_dataset_scores(
             nan_df['model'] + '/' + nan_df['platform'] + '/' + nan_df['dataset']
         )
         keys = list(nan_df['key'].unique())
-        logger.warning('Dropped rows w/ NaN values from: %s: %s', eval_dir, keys)
+        logger.debug('Dropped rows w/ NaN values from: %s: %s', eval_dir, keys)
 
     return df
 
@@ -395,7 +395,7 @@ def _read_platform_scores(
             nan_df['model'] + '/' + nan_df['platform'] + '/' + nan_df['dataset']
         )
         keys = list(nan_df['key'].unique())
-        logger.warning('Dropped rows w/ NaN values from: %s: %s', eval_dir, keys)
+        logger.debug('Dropped rows w/ NaN values from: %s: %s', eval_dir, keys)
 
     # compute accuracy by platform
     scores = {}
@@ -507,7 +507,7 @@ def train(
     for ds_name, ds_meta in datasets.items():
         if 'split_function' in ds_meta:
             plugin_path = ds_meta['split_function']
-            logger.info('Using split function for %s dataset from plugin: %s', ds_name, plugin_path)
+            logger.debug('Using split function for %s dataset from plugin: %s', ds_name, plugin_path)
             plugin = load_plugin(plugin_path)
             split_functions[ds_name] = plugin.split_function
 
@@ -613,7 +613,7 @@ def predict(
         'platform': platform,
     }
 
-    logger.info('Loading dataset: %s', dataset_name)
+    logger.debug('Loading dataset: %s', dataset_name)
     profile_df = load_profiles(
         datasets=datasets,
         node_level_supp=node_level_supp,
@@ -655,7 +655,7 @@ def predict(
         if node_level_supp is not None and any(profile_df['fraction_supported'] != 1.0)
         else 'raw'
     )
-    logger.info('Predicting dataset (%s): %s', filter_str, dataset_name)
+    logger.debug('Predicting dataset (%s): %s', filter_str, dataset_name)
 
     try:
         features_list = []
@@ -684,17 +684,17 @@ def predict(
         if output_info:
             # save features for troubleshooting
             output_file = output_info['features']['path']
-            logger.info('Writing features to: %s', output_file)
+            logger.debug('Writing features to: %s', output_file)
             features.to_csv(output_file, index=False)
 
             feature_importance, shapley_values = compute_shapley_values(xgb_model, features)
 
             output_file = output_info['featureImportance']['path']
-            logger.info('Writing shapley feature importances to: %s', output_file)
+            logger.debug('Writing shapley feature importances to: %s', output_file)
             feature_importance.to_csv(output_file)
 
             output_file = output_info['shapValues']['path']
-            logger.info('Writing shapley values to: %s', output_file)
+            logger.debug('Writing shapley values to: %s', output_file)
             shapley_values.to_csv(output_file, index=False)
 
         # compute per-app speedups
@@ -853,10 +853,10 @@ def evaluate(
             plugin = load_plugin(plugin_path)
             split_fn = plugin.split_function
 
-    logger.info('Loading qualification tool CSV files.')
+    logger.debug('Loading qualification tool CSV files.')
     node_level_supp, qual_tool_output, _ = _get_qual_data(qual_dir)
 
-    logger.info('Loading profiler tool CSV files.')
+    logger.debug('Loading profiler tool CSV files.')
     profile_df = load_profiles(datasets, profile_dir)  # w/ GPU rows
     filtered_profile_df = load_profiles(
         datasets, profile_dir, node_level_supp, qual_tool_filter, qual_tool_output
diff --git a/user_tools/src/spark_rapids_tools/tools/qualx/util.py b/user_tools/src/spark_rapids_tools/tools/qualx/util.py
@@ -364,8 +364,8 @@ def run_command(command: str) -> subprocess.CompletedProcess:
             try:
                 result = future.result()
                 logger.debug('Command completed: %s', command)
-                logger.info(result.stdout)
-                logger.info(result.stderr)
+                logger.debug(result.stdout)
+                logger.debug(result.stderr)
             except Exception as e:  # pylint: disable=broad-except
                 logger.error('Command failed: %s', command)
                 logger.error(e)
@@ -435,15 +435,15 @@ def write_csv_reports(per_sql: pd.DataFrame, per_app: pd.DataFrame, output_info:
     try:
         if per_sql is not None:
             sql_predictions_path = output_info['perSql']['path']
-            logger.info('Writing per-SQL predictions to: %s', sql_predictions_path)
+            logger.debug('Writing per-SQL predictions to: %s', sql_predictions_path)
             per_sql.to_csv(sql_predictions_path)
     except Exception as e:  # pylint: disable=broad-except
         logger.error('Error writing per-SQL predictions. Reason: %s', e)
 
     try:
         if per_app is not None:
             app_predictions_path = output_info['perApp']['path']
-            logger.info('Writing per-application predictions to: %s', app_predictions_path)
+            logger.debug('Writing per-application predictions to: %s', app_predictions_path)
             per_app.to_csv(app_predictions_path)
     except Exception as e:  # pylint: disable=broad-except
         logger.error('Error writing per-app predictions. Reason: %s', e)