feat: add a check for whether values in score_df are NaN (#756)

WinstonLiyt · web-flow · commit d9cc78098beb · 2025-04-04T22:44:55.000+08:00
* add a check for whether values in score_df are NaN

* fix ci

* change raise to assert
diff --git a/rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt b/rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt
@@ -128,5 +128,10 @@ assert model_set_in_scores == set({{model_names}}).union({"ensemble"}), (
 assert score_df.index.is_unique, "The scores dataframe has duplicate model names."
 assert score_df.columns.tolist() == ["{{metric_name}}"], f"The column names of the scores dataframe should be ['{{metric_name}}'], but is '{score_df.columns.tolist()}'"
 
+# Check for NaN values in score_df
+assert not score_df.isnull().values.any(), (
+    f"The scores dataframe contains NaN values at the following locations:\n{score_df[score_df.isnull().any(axis=1)]}"
+)
+
 
 print("Ensemble test end.")
diff --git a/rdagent/components/coder/data_science/pipeline/eval.py b/rdagent/components/coder/data_science/pipeline/eval.py
@@ -82,6 +82,12 @@ def evaluate(
                     score_check_text += f"\n[Error] The scores dataframe does not contain the correct column names.\nCorrect columns is: ['{self.scen.metric_name}']\nBut got: {score_df.columns.tolist()}"
                     score_ret_code = 1
 
+                # Check if scores contain NaN (values)
+                if score_df.isnull().values.any():
+                    nan_locations = score_df[score_df.isnull().any(axis=1)]
+                    score_check_text += f"\n[Error] The scores dataframe contains NaN values at the following locations:\n{nan_locations}"
+                    score_ret_code = 1
+
             except Exception as e:
                 score_check_text += f"\n[Error] in checking the scores.csv file: {e}\nscores.csv's content:\n-----\n{score_fp.read_text()}\n-----"
                 score_ret_code = 1
diff --git a/rdagent/components/coder/data_science/workflow/eval.py b/rdagent/components/coder/data_science/workflow/eval.py
@@ -106,6 +106,12 @@ def evaluate(
                     score_check_text += f"\n[Error] The scores dataframe does not contain the correct column names.\nCorrect columns is: ['{self.scen.metric_name}']\nBut got: {score_df.columns.tolist()}"
                     score_ret_code = 1
 
+                # Check if scores contain NaN (values)
+                if score_df.isnull().values.any():
+                    nan_locations = score_df[score_df.isnull().any(axis=1)]
+                    score_check_text += f"\n[Error] The scores dataframe contains NaN values at the following locations:\n{nan_locations}"
+                    score_ret_code = 1
+
             except Exception as e:
                 score_check_text += f"\n[Error] in checking the scores.csv file: {e}\nscores.csv's content:\n-----\n{score_fp.read_text()}\n-----"
                 score_ret_code = 1