Skip to content

Commit cf910df

Browse files
authored
fix eval api_key leak in metadata; fix llm config in run infer (#2998)
1 parent 692fe21 commit cf910df

File tree

16 files changed

+34
-15
lines changed

16 files changed

+34
-15
lines changed

evaluation/EDA/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def process_instance(
6262
reset_logger: bool = True,
6363
):
6464
# Create the agent
65-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
65+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
6666
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
6767
eval_output_dir = metadata.eval_output_dir
6868
if reset_logger:

evaluation/agent_bench/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def process_instance(
3737
reset_logger: bool = True,
3838
):
3939
# Create the agent
40-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
40+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
4141

4242
inst_id = instance.instance_id
4343
question = instance.description

evaluation/biocoder/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def process_instance(
8787
reset_logger: bool = True,
8888
):
8989
# Create the agent
90-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
90+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
9191
instance = BiocoderData(**instance)
9292
print(instance)
9393
workspace_dir_name = (

evaluation/bird/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def process_instance(
126126
reset_logger: bool = True,
127127
):
128128
# Create the agent
129-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
129+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
130130
workspace_mount_path = os.path.join(
131131
config.workspace_mount_path, 'bird_eval_workspace'
132132
)

evaluation/browsing_delegation/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def process_instance(
3131
reset_logger: bool = True,
3232
):
3333
# Create the agent
34-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
34+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
3535
env_id = instance.instance_id
3636
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
3737
if reset_logger:

evaluation/gaia/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def process_instance(
4848
reset_logger: bool = True,
4949
):
5050
# Create the agent
51-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
51+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
5252
# create process-specific workspace dir
5353
# we will create a workspace directory for EACH process
5454
# so that different agent don't interfere with each other.

evaluation/gpqa/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def process_instance(
120120
reset_logger: bool = True,
121121
):
122122
# Create the agent
123-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
123+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
124124
old_workspace_mount_path = config.workspace_mount_path
125125
old_workspace_base = config.workspace_base
126126
try:

evaluation/humanevalfix/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def process_instance(
108108
reset_logger: bool = True,
109109
):
110110
# Create the agent
111-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
111+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
112112
old_workspace_mount_path = config.workspace_mount_path
113113
old_workspace_base = config.workspace_base
114114

evaluation/logic_reasoning/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def process_instance(
103103
reset_logger: bool = True,
104104
):
105105
# Create the agent
106-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
106+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
107107
old_workspace_mount_path = config.workspace_mount_path
108108
old_workspace_base = config.workspace_base
109109

evaluation/miniwob/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def process_instance(
4141
reset_logger: bool = True,
4242
):
4343
# Create the agent
44-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
44+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
4545
env_id = instance.id
4646
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
4747
if reset_logger:

evaluation/ml_bench/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767

6868

6969
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
70-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
70+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
7171
old_workspace_mount_path = config.workspace_mount_path
7272
old_workspace_base = config.workspace_base
7373
try:

evaluation/swe_bench/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def process_instance(
172172
reset_logger: bool = True,
173173
):
174174
# Create the agent
175-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
175+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
176176

177177
workspace_mount_path = os.path.join(config.workspace_mount_path, '_eval_workspace')
178178
# create process-specific workspace dir

evaluation/toolqa/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636

3737
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
38-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
38+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
3939
# create process-specific workspace dir
4040
# we will create a workspace directory for EACH process
4141
# so that different agent don't interfere with each other.

evaluation/utils/shared.py

+8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ class EvalMetadata(BaseModel):
2929
data_split: str | None = None
3030
details: dict[str, Any] | None = None
3131

32+
def model_dump_json(self, *args, **kwargs):
33+
dumped = super().model_dump_json(*args, **kwargs)
34+
dumped_dict = json.loads(dumped)
35+
logger.debug(f'Dumped metadata: {dumped_dict}')
36+
# avoid leaking sensitive information
37+
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
38+
return json.dumps(dumped_dict)
39+
3240

3341
def codeact_user_response(
3442
state: State,

evaluation/webarena/run_infer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def process_instance(
4242
reset_logger: bool = True,
4343
):
4444
# Create the agent
45-
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
45+
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
4646
env_id = instance.id
4747
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
4848
if reset_logger:

opendevin/core/config.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
load_dotenv()
1818

1919

20+
LLM_SENSITIVE_FIELDS = ['api_key', 'aws_access_key_id', 'aws_secret_access_key']
21+
22+
2023
@dataclass
2124
class LLMConfig:
2225
"""Configuration for the LLM model.
@@ -86,7 +89,7 @@ def __str__(self):
8689
attr_name = f.name
8790
attr_value = getattr(self, f.name)
8891

89-
if attr_name in ['api_key', 'aws_access_key_id', 'aws_secret_access_key']:
92+
if attr_name in LLM_SENSITIVE_FIELDS:
9093
attr_value = '******' if attr_value else None
9194

9295
attr_str.append(f'{attr_name}={repr(attr_value)}')
@@ -96,6 +99,14 @@ def __str__(self):
9699
def __repr__(self):
97100
return self.__str__()
98101

102+
def to_safe_dict(self):
103+
"""Return a dict with the sensitive fields replaced with ******."""
104+
ret = self.__dict__.copy()
105+
for k, v in ret.items():
106+
if k in LLM_SENSITIVE_FIELDS:
107+
ret[k] = '******' if v else None
108+
return ret
109+
99110

100111
@dataclass
101112
class AgentConfig:

0 commit comments

Comments
 (0)