Skip to content

Commit 072d956

Browse files
Merge remote-tracking branch 'upstream/main'
2 parents dcdb448 + 1b6e444 commit 072d956

File tree

660 files changed

+37966
-21101
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

660 files changed

+37966
-21101
lines changed

.devcontainer/README.MD

Lines changed: 0 additions & 1 deletion
This file was deleted.

.devcontainer/devcontainer.json

Lines changed: 0 additions & 15 deletions
This file was deleted.

.devcontainer/on_create.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

.github/dependabot.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ updates:
1818
- "chromadb"
1919
browsergym:
2020
patterns:
21-
- "browsergym"
21+
- "browsergym*"
2222
security-all:
2323
applies-to: "security-updates"
2424
patterns:
@@ -70,3 +70,8 @@ updates:
7070
applies-to: "version-updates"
7171
patterns:
7272
- "*"
73+
74+
- package-ecosystem: "github-actions"
75+
directory: "/"
76+
schedule:
77+
interval: "weekly"
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import re
4+
import sys
5+
from typing import Set, Tuple
6+
7+
8+
def find_version_references(directory: str) -> Tuple[Set[str], Set[str]]:
9+
openhands_versions = set()
10+
runtime_versions = set()
11+
12+
version_pattern_openhands = re.compile(r'openhands:(\d{1})\.(\d{2})')
13+
version_pattern_runtime = re.compile(r'runtime:(\d{1})\.(\d{2})')
14+
15+
for root, _, files in os.walk(directory):
16+
# Skip .git directory
17+
if '.git' in root:
18+
continue
19+
20+
for file in files:
21+
if file.endswith(
22+
('.md', '.yml', '.yaml', '.txt', '.html', '.py', '.js', '.ts')
23+
):
24+
file_path = os.path.join(root, file)
25+
try:
26+
with open(file_path, 'r', encoding='utf-8') as f:
27+
content = f.read()
28+
29+
# Find all openhands version references
30+
matches = version_pattern_openhands.findall(content)
31+
openhands_versions.update(matches)
32+
33+
# Find all runtime version references
34+
matches = version_pattern_runtime.findall(content)
35+
runtime_versions.update(matches)
36+
except Exception as e:
37+
print(f'Error reading {file_path}: {e}', file=sys.stderr)
38+
39+
return openhands_versions, runtime_versions
40+
41+
42+
def main():
43+
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
44+
openhands_versions, runtime_versions = find_version_references(repo_root)
45+
46+
exit_code = 0
47+
48+
if len(openhands_versions) > 1:
49+
print('Error: Multiple openhands versions found:', file=sys.stderr)
50+
print('Found versions:', sorted(openhands_versions), file=sys.stderr)
51+
exit_code = 1
52+
elif len(openhands_versions) == 0:
53+
print('Warning: No openhands version references found', file=sys.stderr)
54+
55+
if len(runtime_versions) > 1:
56+
print('Error: Multiple runtime versions found:', file=sys.stderr)
57+
print('Found versions:', sorted(runtime_versions), file=sys.stderr)
58+
exit_code = 1
59+
elif len(runtime_versions) == 0:
60+
print('Warning: No runtime version references found', file=sys.stderr)
61+
62+
sys.exit(exit_code)
63+
64+
65+
if __name__ == '__main__':
66+
main()

.github/workflows/dummy-agent-test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ jobs:
3636
- name: Set up Docker Buildx
3737
id: buildx
3838
uses: docker/setup-buildx-action@v3
39+
- name: Install tmux
40+
run: sudo apt-get update && sudo apt-get install -y tmux
3941
- name: Install poetry via pipx
4042
run: pipx install poetry
4143
- name: Set up Python

.github/workflows/eval-runner.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ jobs:
2929
- name: Checkout repository
3030
uses: actions/checkout@v4
3131

32+
- name: Install tmux
33+
run: sudo apt-get update && sudo apt-get install -y tmux
3234
- name: Install poetry via pipx
3335
run: pipx install poetry
3436

@@ -129,7 +131,7 @@ jobs:
129131
130132
- name: Post to a Slack channel
131133
id: slack
132-
uses: slackapi/slack-github-action@v1.27.0
134+
uses: slackapi/slack-github-action@v2.0.0
133135
with:
134136
channel-id: 'C07SVQSCR6F'
135137
slack-message: "*Evaluation Trigger:* ${{ github.event_name == 'pull_request' && format('Pull Request (eval-this label on PR #{0})', github.event.pull_request.number) || github.event_name == 'schedule' && 'Daily Schedule' || format('Manual Trigger: {0}', github.event.inputs.reason) }}\n\nLink to summary: [here](https://github.com/${{ github.repository }}/issues/${{ github.event_name == 'pull_request' && github.event.pull_request.number || 4504 }}#issuecomment-${{ steps.create_comment.outputs.comment-id }})"

.github/workflows/fe-unit-tests.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ jobs:
2424
runs-on: ubuntu-latest
2525
strategy:
2626
matrix:
27-
node-version: [20]
27+
node-version: [20, 22]
28+
fail-fast: true
2829
steps:
2930
- name: Checkout
3031
uses: actions/checkout@v4
@@ -42,6 +43,6 @@ jobs:
4243
working-directory: ./frontend
4344
run: npm run test:coverage
4445
- name: Upload coverage to Codecov
45-
uses: codecov/codecov-action@v4
46+
uses: codecov/codecov-action@v5
4647
env:
4748
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

.github/workflows/ghcr-build.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
docker-images: false
5757
swap-storage: true
5858
- name: Set up QEMU
59-
uses: docker/setup-qemu-action@v3.0.0
59+
uses: docker/setup-qemu-action@v3.3.0
6060
with:
6161
image: tonistiigi/binfmt:latest
6262
- name: Login to GHCR
@@ -119,7 +119,7 @@ jobs:
119119
docker-images: false
120120
swap-storage: true
121121
- name: Set up QEMU
122-
uses: docker/setup-qemu-action@v3.0.0
122+
uses: docker/setup-qemu-action@v3.3.0
123123
with:
124124
image: tonistiigi/binfmt:latest
125125
- name: Login to GHCR
@@ -293,7 +293,7 @@ jobs:
293293
RUN_AS_OPENHANDS=false \
294294
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
295295
- name: Upload coverage to Codecov
296-
uses: codecov/codecov-action@v4
296+
uses: codecov/codecov-action@v5
297297
env:
298298
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
299299

@@ -370,7 +370,7 @@ jobs:
370370
RUN_AS_OPENHANDS=true \
371371
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
372372
- name: Upload coverage to Codecov
373-
uses: codecov/codecov-action@v4
373+
uses: codecov/codecov-action@v5
374374
env:
375375
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
376376

.github/workflows/integration-runner.yml

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ jobs:
5656
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
5757
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
5858
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
59+
MAX_ITERATIONS: 10
5960
run: |
6061
echo "[llm.eval]" > config.toml
6162
echo "model = \"$LLM_MODEL\"" >> config.toml
@@ -70,7 +71,7 @@ jobs:
7071
env:
7172
SANDBOX_FORCE_REBUILD_RUNTIME: True
7273
run: |
73-
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'haiku_run'
74+
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' 10 $N_PROCESSES '' 'haiku_run'
7475
7576
# get integration tests report
7677
REPORT_FILE_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/*haiku*_maxiter_10_N* -name "report.md" -type f | head -n 1)
@@ -88,6 +89,7 @@ jobs:
8889
LLM_MODEL: "litellm_proxy/deepseek-chat"
8990
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
9091
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
92+
MAX_ITERATIONS: 10
9193
run: |
9294
echo "[llm.eval]" > config.toml
9395
echo "model = \"$LLM_MODEL\"" >> config.toml
@@ -99,7 +101,7 @@ jobs:
99101
env:
100102
SANDBOX_FORCE_REBUILD_RUNTIME: True
101103
run: |
102-
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'deepseek_run'
104+
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' 10 $N_PROCESSES '' 'deepseek_run'
103105
104106
# get integration tests report
105107
REPORT_FILE_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek*_maxiter_10_N* -name "report.md" -type f | head -n 1)
@@ -109,11 +111,75 @@ jobs:
109111
echo >> $GITHUB_ENV
110112
echo "EOF" >> $GITHUB_ENV
111113
114+
# -------------------------------------------------------------
115+
# Run DelegatorAgent tests for Haiku, limited to t01 and t02
116+
- name: Wait a little bit (again)
117+
run: sleep 5
118+
119+
- name: Configure config.toml for testing DelegatorAgent (Haiku)
120+
env:
121+
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
122+
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
123+
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
124+
MAX_ITERATIONS: 30
125+
run: |
126+
echo "[llm.eval]" > config.toml
127+
echo "model = \"$LLM_MODEL\"" >> config.toml
128+
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
129+
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
130+
echo "temperature = 0.0" >> config.toml
131+
132+
- name: Run integration test evaluation for DelegatorAgent (Haiku)
133+
env:
134+
SANDBOX_FORCE_REBUILD_RUNTIME: True
135+
run: |
136+
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_haiku_run'
137+
138+
# Find and export the delegator test results
139+
REPORT_FILE_DELEGATOR_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/*haiku*_maxiter_30_N* -name "report.md" -type f | head -n 1)
140+
echo "REPORT_FILE_DELEGATOR_HAIKU: $REPORT_FILE_DELEGATOR_HAIKU"
141+
echo "INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU<<EOF" >> $GITHUB_ENV
142+
cat $REPORT_FILE_DELEGATOR_HAIKU >> $GITHUB_ENV
143+
echo >> $GITHUB_ENV
144+
echo "EOF" >> $GITHUB_ENV
145+
146+
# -------------------------------------------------------------
147+
# Run DelegatorAgent tests for DeepSeek, limited to t01 and t02
148+
- name: Wait a little bit (again)
149+
run: sleep 5
150+
151+
- name: Configure config.toml for testing DelegatorAgent (DeepSeek)
152+
env:
153+
LLM_MODEL: "litellm_proxy/deepseek-chat"
154+
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
155+
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
156+
MAX_ITERATIONS: 30
157+
run: |
158+
echo "[llm.eval]" > config.toml
159+
echo "model = \"$LLM_MODEL\"" >> config.toml
160+
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
161+
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
162+
echo "temperature = 0.0" >> config.toml
163+
164+
- name: Run integration test evaluation for DelegatorAgent (DeepSeek)
165+
env:
166+
SANDBOX_FORCE_REBUILD_RUNTIME: True
167+
run: |
168+
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_deepseek_run'
169+
170+
# Find and export the delegator test results
171+
REPORT_FILE_DELEGATOR_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/deepseek*_maxiter_30_N* -name "report.md" -type f | head -n 1)
172+
echo "REPORT_FILE_DELEGATOR_DEEPSEEK: $REPORT_FILE_DELEGATOR_DEEPSEEK"
173+
echo "INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK<<EOF" >> $GITHUB_ENV
174+
cat $REPORT_FILE_DELEGATOR_DEEPSEEK >> $GITHUB_ENV
175+
echo >> $GITHUB_ENV
176+
echo "EOF" >> $GITHUB_ENV
177+
112178
- name: Create archive of evaluation outputs
113179
run: |
114180
TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
115181
cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
116-
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* # Only include the actual result directories
182+
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/DelegatorAgent/* # Only include the actual result directories
117183
118184
- name: Upload evaluation results as artifact
119185
uses: actions/upload-artifact@v4
@@ -154,5 +220,11 @@ jobs:
154220
**Integration Tests Report (DeepSeek)**
155221
DeepSeek LLM Test Results:
156222
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
223+
---
224+
**Integration Tests Report Delegator (Haiku)**
225+
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU }}
226+
---
227+
**Integration Tests Report Delegator (DeepSeek)**
228+
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK }}
157229
---
158230
Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})

.github/workflows/lint.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,16 @@ jobs:
5353
run: pip install pre-commit==3.7.0
5454
- name: Run pre-commit hooks
5555
run: pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml
56+
57+
# Check version consistency across documentation
58+
check-version-consistency:
59+
name: Check version consistency
60+
runs-on: ubuntu-latest
61+
steps:
62+
- uses: actions/checkout@v4
63+
- name: Set up python
64+
uses: actions/setup-python@v5
65+
with:
66+
python-version: 3.12
67+
- name: Run version consistency check
68+
run: .github/scripts/check_version_consistency.py

0 commit comments

Comments
 (0)