Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add naive experiment generator and update proposal configurations #759

Merged
merged 3 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions rdagent/app/data_science/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
#### enable specification
spec_enabled: bool = True

### proposal related
proposal_version: str = "v1"
coder_on_whole_pipeline: bool = False
max_trace_hist: int = 3

coder_max_loop: int = 10
runner_max_loop: int = 3
Expand Down
3 changes: 2 additions & 1 deletion rdagent/components/coder/data_science/pipeline/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@

# Because we use isinstance to distinguish between different types of tasks, we need to use sub classes to represent different types of tasks
class PipelineTask(CoSTEERTask):
pass
def __init__(self, name: str = "Pipeline", *args, **kwargs) -> None:
super().__init__(name=name, *args, **kwargs)
3 changes: 2 additions & 1 deletion rdagent/components/coder/data_science/workflow/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@

# Because we use isinstance to distinguish between different types of tasks, we need to use sub classes to represent different types of tasks
class WorkflowTask(CoSTEERTask):
pass
def __init__(self, name: str = "Workflow", *args, **kwargs) -> None:
super().__init__(name=name, *args, **kwargs)
2 changes: 1 addition & 1 deletion rdagent/core/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def base_iter(settings_cls: type[ExtendedBaseSettings]) -> list[type[ExtendedBas
env_prefix=base_cls.model_config.get("env_prefix"),
env_nested_delimiter=base_cls.model_config.get("env_nested_delimiter"),
)
for base_cls in base_iter(cast(type[ExtendedBaseSettings], settings_cls))
for base_cls in base_iter(cast("type[ExtendedBaseSettings]", settings_cls))
]
return init_settings, env_settings, *parent_env_settings, dotenv_settings, file_secret_settings

Expand Down
2 changes: 1 addition & 1 deletion rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from collections.abc import Sequence
from copy import deepcopy
from pathlib import Path
from typing import Any, Generic, Literal, TypeVar
from typing import Any, Generic, TypeVar

from rdagent.core.conf import RD_AGENT_SETTINGS
from rdagent.core.evaluation import Feedback
Expand Down
2 changes: 1 addition & 1 deletion rdagent/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def similarity(text1: str, text2: str) -> int:
text2 = text2 if isinstance(text2, str) else ""

# Maybe we can use other similarity algorithm such as tfidf
return cast(int, fuzz.ratio(text1, text2)) # mypy does not regard it as int
return cast("int", fuzz.ratio(text1, text2)) # mypy does not regard it as int


def import_class(class_path: str) -> Any:
Expand Down
30 changes: 14 additions & 16 deletions rdagent/scenarios/data_science/proposal/exp_gen/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.core.proposal import ExpGen
from rdagent.core.utils import import_class
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
from rdagent.scenarios.data_science.proposal.exp_gen.base import DSTrace
from rdagent.scenarios.data_science.proposal.exp_gen.draft import DSDraftExpGen
Expand All @@ -11,32 +12,29 @@


class DSExpGen(ExpGen):
"""Data Science Task Generator."""
"""
Data Science Task Generator.
This is a experiment router generator;
"""

def __init__(self, scen: DataScienceScen, max_trace_hist: int = 3) -> None:
self.max_trace_hist = max_trace_hist # max number of historical trace to know when propose new experiment
def __init__(self, scen: DataScienceScen) -> None:
super().__init__(scen)

def gen(self, trace: DSTrace) -> DSExperiment:

if DS_RD_SETTING.proposal_version not in ["v1", "v2"]:
return import_class(DS_RD_SETTING.proposal_version)(scen=self.scen).gen(trace=trace)

if DS_RD_SETTING.coder_on_whole_pipeline:
return DSProposalV2ExpGen(scen=self.scen).gen(
trace=trace,
max_trace_hist=self.max_trace_hist,
pipeline=True,
)
return DSProposalV2ExpGen(scen=self.scen).gen(trace=trace, pipeline=True)

next_missing_component = trace.next_incomplete_component()
if next_missing_component is not None:
return DSDraftExpGen(scen=self.scen).gen(
component=next_missing_component,
trace=trace,
)
if DS_RD_SETTING.proposal_version == "v1":
return DSProposalV1ExpGen(scen=self.scen).gen(
trace=trace,
max_trace_hist=self.max_trace_hist,
)
return DSProposalV1ExpGen(scen=self.scen).gen(trace=trace)
if DS_RD_SETTING.proposal_version == "v2":
return DSProposalV2ExpGen(scen=self.scen).gen(
trace=trace,
max_trace_hist=self.max_trace_hist,
)
return DSProposalV2ExpGen(scen=self.scen).gen(trace=trace)
64 changes: 64 additions & 0 deletions rdagent/scenarios/data_science/proposal/exp_gen/naive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
The most naive way to design experiments
"""

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
from rdagent.core.proposal import ExpGen
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
from rdagent.scenarios.data_science.proposal.exp_gen.base import DSHypothesis, DSTrace
from rdagent.utils.agent.tpl import T
from rdagent.utils.agent.workflow import build_cls_from_json_with_retry


class NaiveExpGen(ExpGen):
def gen(self, trace: DSTrace) -> DSExperiment:
sota_exp = trace.sota_experiment()
scenario_desc = trace.scen.get_scenario_all_desc()
competition_desc = trace.scen.get_competition_full_desc()
sota_exp_desc = T("scenarios.data_science.share:describe.exp").r(
exp=sota_exp, heading="Best of previous exploration of the scenario"
)

sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
-DS_RD_SETTING.max_trace_hist :
]

sota_exp_and_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
exp_and_feedback_list=sota_exp_feedback_list,
success=True,
)
failed_exp_and_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
exp_and_feedback_list=failed_exp_feedback_list,
success=False,
)

sys_prompt = T(".naive:naive_gen.system").r()

user_prompt = T(".naive:naive_gen.user").r(
competition_desc=competition_desc,
sota_exp_desc=sota_exp_desc,
scenario_desc=scenario_desc,
sota_exp_and_feedback_list_desc=sota_exp_and_feedback_list_desc,
failed_exp_and_feedback_list_desc=failed_exp_and_feedback_list_desc,
)

task = build_cls_from_json_with_retry(
cls=PipelineTask,
system_prompt=sys_prompt,
user_prompt=user_prompt,
retry_n=5,
)

exp = DSExperiment(
pending_tasks_list=[[task]],
hypothesis=DSHypothesis(
component="Pipeline",
hypothesis=task.description,
),
)

if sota_exp is not None:
exp.experiment_workspace.inject_code_from_file_dict(sota_exp.experiment_workspace)
return exp
37 changes: 37 additions & 0 deletions rdagent/scenarios/data_science/proposal/exp_gen/naive.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
naive_gen:
system: |-
You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
You will be given a competition scenario, previous SOTA(best) and failed experiments and feedbacks, the current SOTA implementation and feedback, and a list of identified problems.

## Guidelines
Here are guidelines to aid your task design. You don't need to answer all the questions.
1. Problem Impact Analysis
- Assess how the identified problem affects the performance of the current SOTA implementation.
2. Lessons from Previous Experiments
- For persistent problem, analyze why previous experiments failed on this problem.
- Review why previous experiments failed to address the problem. Identify patterns, overlooked factors, or misaligned assumptions.
- Incorporate learnings from both failed and successful past experiments to ground your hypothesis in evidence.
3. Actionable Changes
- If the problem relates to time/memory constraints, suggest smaller model sizes or alternative algorithms with reduced complexity.
- If the problem involves underperforming models, propose removing or replacing models with significantly worse performance.
- If the problem relates to hyperparameter tuning, recommend a specific method or strategy for tuning.

## Final Output Format in JSON Schema:
{% include "scenarios.data_science.proposal.exp_gen.prompts:output_format.pipeline" %}

user: |-
# Scenario Description
{{ scenario_desc }}

# Competition Description
{{ competition_desc }}

# Previous Failed Experiments and Feedbacks:
{{ failed_exp_and_feedback_list_desc }}

# Previous SOTA Experiments and Feedbacks:
{{ sota_exp_and_feedback_list_desc }}

# Current SOTA Implementation
{{ sota_exp_desc }}
12 changes: 8 additions & 4 deletions rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@


class DSProposalV1ExpGen(ExpGen):
def gen(self, trace: DSTrace, max_trace_hist: int) -> DSExperiment:
def gen(self, trace: DSTrace) -> DSExperiment:
# Guidelines:
# System prompts: Shared condition you are facing
# - scenario description: `scenario_desc`
Expand All @@ -84,7 +84,9 @@ def gen(self, trace: DSTrace, max_trace_hist: int) -> DSExperiment:
) # we use file_dict for hitting the cache when replicate the experiment in another machine.

sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[-max_trace_hist:]
failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
-DS_RD_SETTING.max_trace_hist :
]
all_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="all")
trace_component_to_feedback_df = pd.DataFrame(columns=["component", "hypothesis", "decision"])
for index, (exp, fb) in enumerate(all_exp_feedback_list):
Expand Down Expand Up @@ -414,7 +416,7 @@ def task_gen(
exp.pending_tasks_list.append([workflow_task])
return exp

def gen(self, trace: DSTrace, max_trace_hist: int, pipeline: bool = False) -> DSExperiment:
def gen(self, trace: DSTrace, pipeline: bool = False) -> DSExperiment:
component_desc = "\n".join(
[
f"[{key}] {value}"
Expand All @@ -431,7 +433,9 @@ def gen(self, trace: DSTrace, max_trace_hist: int, pipeline: bool = False) -> DS
)

sota_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="sota")
failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[-max_trace_hist:]
failed_exp_feedback_list = trace.experiment_and_feedback_list_after_init(return_type="failed")[
-DS_RD_SETTING.max_trace_hist :
]

sota_exp_feedback_list_desc = T("scenarios.data_science.share:describe.trace").r(
exp_and_feedback_list=sota_exp_feedback_list,
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/data_science/share.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,4 @@ component_spec:

8. Submission File:
- Save the final predictions as `submission.csv`, ensuring the format matches the competition requirements (refer to `sample_submission` in the Folder Description for the correct structure).
- Present the required submission format explicitly and ensure the output adheres to it.
- Present the required submission format explicitly and ensure the output adheres to it.