36
36
)
37
37
from openhands .core .logger import openhands_logger as logger
38
38
from openhands .core .main import create_runtime , run_controller
39
- from openhands .events .action import CmdRunAction , MessageAction , FileReadAction
39
+ from openhands .events .action import CmdRunAction , FileReadAction , MessageAction
40
40
from openhands .events .observation import CmdOutputObservation , ErrorObservation
41
41
from openhands .events .serialization .event import event_to_dict
42
42
from openhands .runtime .base import Runtime
43
43
from openhands .utils .async_utils import call_async_from_sync
44
44
from openhands .utils .shutdown_listener import sleep_if_should_continue
45
- import pdb
46
45
47
46
USE_HINT_TEXT = os .environ .get ('USE_HINT_TEXT' , 'false' ).lower () == 'true'
48
47
USE_INSTANCE_IMAGE = os .environ .get ('USE_INSTANCE_IMAGE' , 'true' ).lower () == 'true'
51
50
# TODO: migrate all swe-bench docker to ghcr.io/openhands
52
51
# TODO: 适应所有的语言
53
52
DOCKER_IMAGE_PREFIX = os .environ .get ('EVAL_DOCKER_IMAGE_PREFIX' , '' )
54
- LANGUAGE = os .environ .get ('LANGUAGE' , 'python' )
53
+ LANGUAGE = os .environ .get ('LANGUAGE' , 'python' )
55
54
logger .info (f'Using docker image prefix: { DOCKER_IMAGE_PREFIX } ' )
56
55
57
56
@@ -71,7 +70,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
71
70
# Instruction based on Anthropic's official trajectory
72
71
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
73
72
instructions = {
74
- " python" : (
73
+ ' python' : (
75
74
'<uploaded_files>\n '
76
75
f'/workspace/{ workspace_dir_name } \n '
77
76
'</uploaded_files>\n '
@@ -96,7 +95,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
96
95
' Make sure all these tests pass with your changes.\n '
97
96
"Your thinking should be thorough and so it's fine if it's very long.\n "
98
97
),
99
- " java" : (
98
+ ' java' : (
100
99
'<uploaded_files>\n '
101
100
f'/workspace/{ workspace_dir_name } \n '
102
101
'</uploaded_files>\n '
@@ -121,7 +120,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
121
120
" Make sure all these tests pass with your changes.\n "
122
121
"Your thinking should be thorough and so it's fine if it's very long.\n "
123
122
),
124
- "go" : (
123
+ 'go' : (
125
124
'<uploaded_files>\n '
126
125
f'/workspace/{ workspace_dir_name } \n '
127
126
'</uploaded_files>\n '
@@ -146,7 +145,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
146
145
' Make sure all these tests pass with your changes.\n '
147
146
"Your thinking should be thorough and so it's fine if it's very long.\n "
148
147
),
149
- "c" : (
148
+ 'c' : (
150
149
'<uploaded_files>\n '
151
150
f'/workspace/{ workspace_dir_name } \n '
152
151
'</uploaded_files>\n '
@@ -171,7 +170,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
171
170
' Make sure all these tests pass with your changes.\n '
172
171
"Your thinking should be thorough and so it's fine if it's very long.\n "
173
172
),
174
- " cpp" : (
173
+ ' cpp' : (
175
174
'<uploaded_files>\n '
176
175
f'/workspace/{ workspace_dir_name } \n '
177
176
'</uploaded_files>\n '
@@ -196,7 +195,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
196
195
' Make sure all these tests pass with your changes.\n '
197
196
"Your thinking should be thorough and so it's fine if it's very long.\n "
198
197
),
199
- " javascript" : (
198
+ ' javascript' : (
200
199
'<uploaded_files>\n '
201
200
f'/workspace/{ workspace_dir_name } \n '
202
201
'</uploaded_files>\n '
@@ -221,7 +220,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
221
220
' Make sure all these tests pass with your changes.\n '
222
221
"Your thinking should be thorough and so it's fine if it's very long.\n "
223
222
),
224
- " typescript" : (
223
+ ' typescript' : (
225
224
'<uploaded_files>\n '
226
225
f'/workspace/{ workspace_dir_name } \n '
227
226
'</uploaded_files>\n '
@@ -246,7 +245,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
246
245
' Make sure all these tests pass with your changes.\n '
247
246
"Your thinking should be thorough and so it's fine if it's very long.\n "
248
247
),
249
- " rust" : (
248
+ ' rust' : (
250
249
'<uploaded_files>\n '
251
250
f'/workspace/{ workspace_dir_name } \n '
252
251
'</uploaded_files>\n '
@@ -270,11 +269,10 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
270
269
' - The functions you changed\n '
271
270
' Make sure all these tests pass with your changes.\n '
272
271
"Your thinking should be thorough and so it's fine if it's very long.\n "
273
- )
272
+ ),
274
273
}
275
274
instruction = instructions .get (LANGUAGE .lower ())
276
275
277
-
278
276
if instruction and RUN_WITH_BROWSING :
279
277
instruction += (
280
278
'<IMPORTANT!>\n '
@@ -284,7 +282,6 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
284
282
return instruction
285
283
286
284
287
-
288
285
# TODO: 适应所有的语言
289
286
# def get_instance_docker_image(instance_id: str) -> str:
290
287
# image_name = 'sweb.eval.x86_64.' + instance_id
@@ -307,16 +304,15 @@ def get_instance_docker_image(instance: pd.Series):
307
304
container_name = container_name .replace ('/' , '_m_' )
308
305
instance_id = instance .get ('instance_id' , '' )
309
306
tag_suffix = instance_id .split ('-' )[- 1 ] if instance_id else ''
310
- container_tag = f" pr-{ tag_suffix } "
307
+ container_tag = f' pr-{ tag_suffix } '
311
308
# pdb.set_trace()
312
- return f" mswebench/{ container_name } :{ container_tag } "
309
+ return f' mswebench/{ container_name } :{ container_tag } '
313
310
# return "kong/insomnia:pr-8284"
314
311
# return "'sweb.eval.x86_64.local_insomnia"
315
312
# return "local_insomnia_why"
316
313
# return "local/kong-insomnia:pr-8117"
317
314
318
315
319
-
320
316
def get_config (
321
317
instance : pd .Series ,
322
318
metadata : EvalMetadata ,
@@ -569,7 +565,6 @@ def complete_runtime(
569
565
f'Failed to git config --global core.pager "": { str (obs )} ' ,
570
566
)
571
567
572
-
573
568
action = CmdRunAction (command = 'git add -A' )
574
569
action .set_hard_timeout (600 )
575
570
logger .info (action , extra = {'msg_type' : 'ACTION' })
@@ -582,14 +577,14 @@ def complete_runtime(
582
577
583
578
##删除二进制文件
584
579
action = CmdRunAction (
585
- command = f'''
580
+ command = """
586
581
for file in $(git status --porcelain | grep -E "^(M| M|\\ ?\\ ?|A| A)" | cut -c4-); do
587
582
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
588
583
git rm -f "$file" 2>/dev/null || rm -f "$file"
589
584
echo "Removed: $file"
590
585
fi
591
586
done
592
- '''
587
+ """
593
588
)
594
589
action .set_hard_timeout (600 )
595
590
logger .info (action , extra = {'msg_type' : 'ACTION' })
@@ -626,14 +621,12 @@ def complete_runtime(
626
621
else :
627
622
assert_and_raise (False , f'Unexpected observation type: { str (obs )} ' )
628
623
629
- action = FileReadAction (
630
- path = 'patch.diff'
631
- )
624
+ action = FileReadAction (path = 'patch.diff' )
632
625
action .set_hard_timeout (max (300 + 100 * n_retries , 600 ))
633
626
logger .info (action , extra = {'msg_type' : 'ACTION' })
634
627
obs = runtime .run_action (action )
635
628
git_patch = obs .content
636
- # pdb.set_trace()
629
+ # pdb.set_trace()
637
630
638
631
assert_and_raise (git_patch is not None , 'Failed to get git diff (None)' )
639
632
@@ -714,20 +707,21 @@ def remove_binary_diffs(patch_text):
714
707
is_binary_block = False
715
708
716
709
for line in lines :
717
- if line .startswith (" diff --git " ):
710
+ if line .startswith (' diff --git ' ):
718
711
if block and not is_binary_block :
719
712
cleaned_lines .extend (block )
720
713
block = [line ]
721
714
is_binary_block = False
722
- elif " Binary files" in line :
715
+ elif ' Binary files' in line :
723
716
is_binary_block = True
724
717
block .append (line )
725
718
else :
726
719
block .append (line )
727
720
728
721
if block and not is_binary_block :
729
722
cleaned_lines .extend (block )
730
- return "\n " .join (cleaned_lines )
723
+ return '\n ' .join (cleaned_lines )
724
+
731
725
git_patch = remove_binary_diffs (git_patch )
732
726
test_result = {
733
727
'git_patch' : git_patch ,
@@ -797,7 +791,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
797
791
# so we don't need to manage file uploading to OpenHands's repo
798
792
# dataset = load_dataset(args.dataset, split=args.split)
799
793
# dataset = load_dataset(args.dataset)
800
- dataset = load_dataset (" json" , data_files = args .dataset )
794
+ dataset = load_dataset (' json' , data_files = args .dataset )
801
795
dataset = dataset [args .split ]
802
796
swe_bench_tests = filter_dataset (dataset .to_pandas (), 'instance_id' )
803
797
logger .info (
0 commit comments