diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index ec29bcb06eb7..5cd66901d6ce 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -18,12 +18,14 @@ # from evaluation.EDA.scorer import question_scorer from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + game = None diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index c434998c8921..d6edeb3ff961 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -22,7 +22,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -30,6 +30,8 @@ from opendevin.llm.llm import LLM from opendevin.runtime.docker.ssh_box import DockerSSHBox +config = load_app_config() + def process_instance( instance: pd.Series, diff --git a/evaluation/biocoder/biocoder_env_box.py b/evaluation/biocoder/biocoder_env_box.py index cdf1248d0459..f589535ca3a4 100644 --- a/evaluation/biocoder/biocoder_env_box.py +++ b/evaluation/biocoder/biocoder_env_box.py @@ -7,7 +7,7 @@ from datasets import load_dataset -from opendevin.core.config import config +from opendevin.core.config import load_app_config from opendevin.core.logger import opendevin_logger as logger from opendevin.runtime.docker.ssh_box import DockerSSHBox from opendevin.runtime.plugins import ( @@ -16,6 +16,8 @@ SWEAgentCommandsRequirement, ) +config = load_app_config() + BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0' diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index f08f06bffc55..3fb14e8bd76c 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -18,12 +18,14 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': partial( codeact_user_response, encapsulate_solution=True, try_parse=None diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index f086ba70c513..d370f6780ee1 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -21,13 +21,15 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.events.action import MessageAction from opendevin.llm.llm import LLM +config = load_app_config() + def codeact_user_response(state: State) -> str: msg = ( diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py index 43cb3723db18..02e529fe0d4f 100644 --- a/evaluation/browsing_delegation/run_infer.py +++ b/evaluation/browsing_delegation/run_infer.py @@ -15,12 +15,14 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + # Only CodeActAgent can delegate to BrowsingAgent SUPPORTED_AGENT_CLS = {'CodeActAgent'} diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index 775d5b605a8f..cfbfb32eb9be 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -20,13 +20,15 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.events.action import CmdRunAction, MessageAction from opendevin.llm.llm import LLM +config = load_app_config() + DATASET_CACHE_DIR = '~/.cache/open-devin/evals/gaia' DATASET_CACHE_DIR = os.path.expanduser(DATASET_CACHE_DIR) diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index 967cf4a9dae2..ea68d445a109 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -12,7 +12,7 @@ from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -21,6 +21,8 @@ from .utils import encode_question, get_data +config = load_app_config() + def cleanup(): print('Cleaning up child processes...') diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index 84072fc2b57d..9cc8751f3e33 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -36,7 +36,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -44,6 +44,8 @@ from opendevin.events.observation import Observation from opendevin.llm.llm import LLM +config = load_app_config() + ACTION_FORMAT = """ < (Please dont use any additional characters. Just the letter of the correct answer (A/B/C/D).) diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py index 2e8322560edb..fa683966339f 100644 --- a/evaluation/humanevalfix/run_infer.py +++ b/evaluation/humanevalfix/run_infer.py @@ -26,12 +26,14 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + IMPORT_HELPER = { 'python': [ 'import math', diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index 6071d0d4b3f9..f8a050bfd22e 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -17,12 +17,14 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, } diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 317a7b0e7918..9218ac56d9bb 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -15,7 +15,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -23,6 +23,8 @@ from opendevin.runtime.docker.ssh_box import DockerSSHBox from opendevin.runtime.tools import RuntimeTool +config = load_app_config() + SUPPORTED_AGENT_CLS = {'BrowsingAgent'} docker_ssh_box: DockerSSHBox | None = None diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index 39eb476a1412..b1d4c0826880 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -16,7 +16,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -27,6 +27,8 @@ from .prompts import ToolPromptTemplate from .tasks import Task +config = load_app_config() + def codeact_user_response_mint(state: State, task: Task, task_config: Dict[str, int]): logger.info(f'Gold reference: {task.reference}') diff --git a/evaluation/ml_bench/run_analysis.py b/evaluation/ml_bench/run_analysis.py index 487c405a47cc..14d0acb5ac86 100644 --- a/evaluation/ml_bench/run_analysis.py +++ b/evaluation/ml_bench/run_analysis.py @@ -4,10 +4,12 @@ import tqdm -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import opendevin_logger as logger from opendevin.llm.llm import LLM +config = load_app_config() + def extract_test_results(res_file_path: str) -> tuple[list[str], list[str]]: passed = [] diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py index 6c7158e10ce4..9be043933177 100644 --- a/evaluation/ml_bench/run_infer.py +++ b/evaluation/ml_bench/run_infer.py @@ -30,13 +30,15 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM from opendevin.runtime.docker.ssh_box import DockerSSHBox +config = load_app_config() + AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, } diff --git a/evaluation/regression/run_tests.py b/evaluation/regression/run_tests.py index 705739b1c9f9..ed2429b0299e 100644 --- a/evaluation/regression/run_tests.py +++ b/evaluation/regression/run_tests.py @@ -2,7 +2,9 @@ import pytest -from opendevin.config import config +from opendevin.config import load_app_config + +config = load_app_config() if __name__ == '__main__': """Main entry point of the script. diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 5b6d937ec1e9..0578eb588a3b 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -19,12 +19,14 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller from opendevin.llm.llm import LLM +config = load_app_config() + USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false') == 'true' USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false') == 'true' diff --git a/evaluation/swe_bench/swe_env_box.py b/evaluation/swe_bench/swe_env_box.py index 062b3ad83399..47427c683615 100644 --- a/evaluation/swe_bench/swe_env_box.py +++ b/evaluation/swe_bench/swe_env_box.py @@ -8,7 +8,7 @@ from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK from swebench.harness.utils import get_test_directives -from opendevin.core.config import SandboxConfig, config +from opendevin.core.config import AppConfig, SandboxConfig, load_app_config from opendevin.core.logger import opendevin_logger as logger from opendevin.runtime.docker.ssh_box import DockerSSHBox from opendevin.runtime.plugins import ( @@ -27,6 +27,7 @@ def get_image_name_from_instance_id(instance_id: str) -> str: class SWEBenchSSHBox(DockerSSHBox): def __init__( self, + config: AppConfig, container_image: str, timeout: int = 120, sid: str | None = None, @@ -146,6 +147,7 @@ def volumes(self): def get_box_for_instance( cls, instance, + config: AppConfig, workspace_dir_name=None, skip_workspace_mount: bool = True, workspace_mount_path: str | None = None, @@ -164,7 +166,7 @@ def get_box_for_instance( config.workspace_mount_path = workspace_mount_path # linting python after editing helps LLM fix indentations - config.enable_auto_lint = True + config.sandbox.enable_auto_lint = True # Need to run as root to use SWEBench container config.run_as_devin = False if use_instance_image: @@ -175,6 +177,7 @@ def get_box_for_instance( container_image = SWE_BENCH_CONTAINER_IMAGE sandbox = cls( container_image=container_image, + config=config, swe_instance_id=instance['instance_id'], swe_instance=instance, skip_workspace_mount=skip_workspace_mount, @@ -239,6 +242,8 @@ def get_diff_patch(self): if __name__ == '__main__': + config = load_app_config() + # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing # so we don't need to manage file uploading to OpenDevin's repo dataset = load_dataset('princeton-nlp/SWE-bench_Lite') @@ -252,6 +257,7 @@ def get_diff_patch(self): EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict() sandbox = SWEBenchSSHBox.get_box_for_instance( + config=config, instance=EXAMPLE_INSTANCE, sandbox_plugins=[AgentSkillsRequirement(), JupyterRequirement()], use_instance_image=USE_INSTANCE_IMAGE, diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index 152c35b023e2..7653d35568ff 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -15,7 +15,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, get_parser +from opendevin.core.config import get_llm_config_arg, get_parser, load_app_config from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -23,6 +23,8 @@ from .utils import download_data, download_tools, encode_question, eval_answer, get_data +config = load_app_config() + AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, } diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index dd532ccf3e1f..4e6d181566cf 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -15,7 +15,7 @@ ) from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import get_console_handler from opendevin.core.logger import opendevin_logger as logger from opendevin.core.main import run_agent_controller @@ -23,6 +23,8 @@ from opendevin.runtime.docker.ssh_box import DockerSSHBox from opendevin.runtime.tools import RuntimeTool +config = load_app_config() + SUPPORTED_AGENT_CLS = {'BrowsingAgent'} diff --git a/opendevin/controller/state/state.py b/opendevin/controller/state/state.py index 2c25f916f3c4..a8dd2ed33372 100644 --- a/opendevin/controller/state/state.py +++ b/opendevin/controller/state/state.py @@ -12,7 +12,7 @@ ) from opendevin.events.action.agent import AgentFinishAction from opendevin.memory.history import ShortTermHistory -from opendevin.storage import get_file_store +from opendevin.storage.files import FileStore class TrafficControlState(str, Enum): @@ -107,22 +107,20 @@ class State: end_id: int = -1 almost_stuck: int = 0 - def save_to_session(self, sid: str): - fs = get_file_store() + def save_to_session(self, sid: str, file_store: FileStore): pickled = pickle.dumps(self) logger.debug(f'Saving state to session {sid}:{self.agent_state}') encoded = base64.b64encode(pickled).decode('utf-8') try: - fs.write(f'sessions/{sid}/agent_state.pkl', encoded) + file_store.write(f'sessions/{sid}/agent_state.pkl', encoded) except Exception as e: logger.error(f'Failed to save state to session: {e}') raise e @staticmethod - def restore_from_session(sid: str) -> 'State': - fs = get_file_store() + def restore_from_session(sid: str, file_store: FileStore) -> 'State': try: - encoded = fs.read(f'sessions/{sid}/agent_state.pkl') + encoded = file_store.read(f'sessions/{sid}/agent_state.pkl') pickled = base64.b64decode(encoded) state = pickle.loads(pickled) except Exception as e: diff --git a/opendevin/core/config.py b/opendevin/core/config.py index 1360d9988475..4fff99216a99 100644 --- a/opendevin/core/config.py +++ b/opendevin/core/config.py @@ -18,6 +18,8 @@ LLM_SENSITIVE_FIELDS = ['api_key', 'aws_access_key_id', 'aws_secret_access_key'] +_DEFAULT_AGENT = 'CodeActAgent' +_MAX_ITERATIONS = 100 @dataclass @@ -220,7 +222,7 @@ class AppConfig(metaclass=Singleton): llms: dict[str, LLMConfig] = field(default_factory=dict) agents: dict = field(default_factory=dict) - default_agent: str = 'CodeActAgent' + default_agent: str = _DEFAULT_AGENT sandbox: SandboxConfig = field(default_factory=SandboxConfig) runtime: str = 'server' file_store: str = 'memory' @@ -234,7 +236,7 @@ class AppConfig(metaclass=Singleton): cache_dir: str = '/tmp/cache' run_as_devin: bool = True confirmation_mode: bool = False - max_iterations: int = 100 + max_iterations: int = _MAX_ITERATIONS max_budget_per_task: float | None = None e2b_api_key: str = '' ssh_hostname: str = 'localhost' @@ -631,21 +633,20 @@ def get_parser() -> argparse.ArgumentParser: parser.add_argument( '-c', '--agent-cls', - default=config.default_agent, + default=_DEFAULT_AGENT, type=str, help='Name of the default agent to use', ) parser.add_argument( '-i', '--max-iterations', - default=config.max_iterations, + default=_MAX_ITERATIONS, type=int, help='The maximum number of iterations to run the agent', ) parser.add_argument( '-b', '--max-budget-per-task', - default=config.max_budget_per_task, type=float, help='The maximum budget allowed per task, beyond which the agent will stop.', ) @@ -688,9 +689,6 @@ def parse_arguments() -> argparse.Namespace: """Parse the command line arguments.""" parser = get_parser() parsed_args, _ = parser.parse_known_args() - if parsed_args.directory: - config.workspace_base = os.path.abspath(parsed_args.directory) - print(f'Setting workspace base to {config.workspace_base}') return parsed_args @@ -708,6 +706,3 @@ def load_app_config(set_logging_levels: bool = True) -> AppConfig: logger.DEBUG = config.debug logger.DISABLE_COLOR_PRINTING = config.disable_color return config - - -config = load_app_config() diff --git a/opendevin/core/main.py b/opendevin/core/main.py index f449418d493e..34f58082979c 100644 --- a/opendevin/core/main.py +++ b/opendevin/core/main.py @@ -7,7 +7,7 @@ from opendevin.controller import AgentController from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import config, get_llm_config_arg, parse_arguments +from opendevin.core.config import get_llm_config_arg, load_app_config, parse_arguments from opendevin.core.logger import opendevin_logger as logger from opendevin.core.schema import AgentState from opendevin.events import EventSource, EventStream, EventStreamSubscriber @@ -17,6 +17,9 @@ from opendevin.llm.llm import LLM from opendevin.runtime import get_runtime_cls from opendevin.runtime.sandbox import Sandbox +from opendevin.storage import get_file_store + +config = load_app_config() def read_task_from_file(file_path: str) -> str: @@ -58,15 +61,16 @@ async def run_agent_controller( ) # set up the event stream + file_store = get_file_store(config.file_store, config.file_store_path) cli_session = 'main' + ('_' + sid if sid else '') - event_stream = EventStream(cli_session) + event_stream = EventStream(cli_session, file_store) # restore cli session if enabled initial_state = None if config.enable_cli_session: try: logger.info('Restoring agent state from cli session') - initial_state = State.restore_from_session(cli_session) + initial_state = State.restore_from_session(cli_session, file_store) except Exception as e: print('Error restoring state', e) @@ -83,9 +87,7 @@ async def run_agent_controller( # runtime and tools runtime_cls = get_runtime_cls(config.runtime) - runtime = runtime_cls( - sandbox_config=config.sandbox, event_stream=event_stream, sandbox=sandbox - ) + runtime = runtime_cls(config=config, event_stream=event_stream, sandbox=sandbox) await runtime.ainit() runtime.init_sandbox_plugins(controller.agent.sandbox_plugins) runtime.init_runtime_tools( @@ -142,7 +144,7 @@ async def on_event(event: Event): # save session when we're about to close if config.enable_cli_session: end_state = controller.get_state() - end_state.save_to_session(cli_session) + end_state.save_to_session(cli_session, file_store) # close when done await controller.close() diff --git a/opendevin/events/stream.py b/opendevin/events/stream.py index 142d65c5d647..5a6f90998b98 100644 --- a/opendevin/events/stream.py +++ b/opendevin/events/stream.py @@ -7,7 +7,7 @@ from opendevin.core.logger import opendevin_logger as logger from opendevin.core.utils import json from opendevin.events.serialization.event import event_from_dict, event_to_dict -from opendevin.storage import FileStore, get_file_store +from opendevin.storage import FileStore from .event import Event, EventSource @@ -29,15 +29,15 @@ class EventStream: _lock: threading.Lock _file_store: FileStore - def __init__(self, sid: str): + def __init__(self, sid: str, file_store: FileStore): self.sid = sid - self._file_store = get_file_store() + self._file_store = file_store self._subscribers = {} self._cur_id = 0 self._lock = threading.Lock() self._reinitialize_from_file_store() - def _reinitialize_from_file_store(self): + def _reinitialize_from_file_store(self) -> None: try: events = self._file_store.list(f'sessions/{self.sid}/events') except FileNotFoundError: diff --git a/opendevin/memory/memory.py b/opendevin/memory/memory.py index 18d0903dbf03..a175c36d3f95 100644 --- a/opendevin/memory/memory.py +++ b/opendevin/memory/memory.py @@ -13,14 +13,14 @@ wait_random_exponential, ) -from opendevin.core.config import LLMConfig, config +from opendevin.core.config import LLMConfig from opendevin.core.logger import opendevin_logger as logger from opendevin.core.utils import json -# TODO: this should depend on specific agent setting -num_retries = config.get_llm_config().num_retries -retry_min_wait = config.get_llm_config().retry_min_wait -retry_max_wait = config.get_llm_config().retry_max_wait +# TODO: this could be made configurable +num_retries: int = 10 +retry_min_wait: int = 3 +retry_max_wait: int = 300 # llama-index includes a retry decorator around openai.get_embeddings() function # it is initialized with hard-coded values and errors @@ -110,21 +110,19 @@ def get_embedding_model(strategy: str, llm_config: LLMConfig): class LongTermMemory: """Handles storing information for the agent to access later, using chromadb.""" - def __init__(self, agent_config_name='agent'): + def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1): """Initialize the chromadb and set up ChromaVectorStore for later use.""" db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False)) self.collection = db.get_or_create_collection(name='memories') vector_store = ChromaVectorStore(chroma_collection=self.collection) - agent_config = config.get_agent_config(agent_config_name) - llm_config = config.get_llm_config(agent_config.llm_config) embedding_strategy = llm_config.embedding_model embed_model = EmbeddingsLoader.get_embedding_model( embedding_strategy, llm_config ) self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model) - self.sema = threading.Semaphore(value=agent_config.memory_max_threads) + self.sema = threading.Semaphore(value=memory_max_threads) self.thought_idx = 0 - self._add_threads = [] + self._add_threads: list[threading.Thread] = [] def add_event(self, event: dict): """Adds a new event to the long term memory with a unique id. diff --git a/opendevin/runtime/client/runtime.py b/opendevin/runtime/client/runtime.py index 57689d978d20..100d18df9036 100644 --- a/opendevin/runtime/client/runtime.py +++ b/opendevin/runtime/client/runtime.py @@ -6,7 +6,7 @@ import docker import tenacity -from opendevin.core.config import SandboxConfig, config +from opendevin.core.config import AppConfig from opendevin.core.logger import opendevin_logger as logger from opendevin.events import EventSource, EventStream from opendevin.events.action import ( @@ -26,11 +26,7 @@ ) from opendevin.events.serialization import event_to_dict, observation_from_dict from opendevin.events.serialization.action import ACTION_TYPE_TO_CLASS -from opendevin.runtime.plugins import ( - AgentSkillsRequirement, - JupyterRequirement, - PluginRequirement, -) +from opendevin.runtime.plugins import PluginRequirement from opendevin.runtime.runtime import Runtime from opendevin.runtime.utils import find_available_tcp_port from opendevin.runtime.utils.runtime_build import build_runtime_image @@ -45,15 +41,13 @@ class EventStreamRuntime(Runtime): def __init__( self, - sandbox_config: SandboxConfig, + config: AppConfig, event_stream: EventStream, sid: str = 'default', container_image: str | None = None, plugins: list[PluginRequirement] | None = None, ): - super().__init__( - sandbox_config, event_stream, sid - ) # will initialize the event stream + super().__init__(config, event_stream, sid) # will initialize the event stream self._port = find_available_tcp_port() self.api_url = f'http://localhost:{self._port}' self.session: Optional[aiohttp.ClientSession] = None @@ -81,11 +75,11 @@ async def ainit(self, env_vars: dict[str, str] | None = None): # NOTE: You can need set DEBUG=true to update the source code # inside the container. This is useful when you want to test/debug the # latest code in the runtime docker container. - update_source_code=self.sandbox_config.update_source_code, + update_source_code=self.config.sandbox.update_source_code, ) self.container = await self._init_container( self.sandbox_workspace_dir, - mount_dir=config.workspace_mount_path, + mount_dir=self.config.workspace_mount_path, plugins=self.plugins, ) # MUST call super().ainit() to initialize both default env vars @@ -109,7 +103,7 @@ def _init_docker_client() -> docker.DockerClient: async def _init_container( self, sandbox_workspace_dir: str, - mount_dir: str = config.workspace_mount_path, + mount_dir: str, plugins: list[PluginRequirement] | None = None, ): try: @@ -122,7 +116,7 @@ async def _init_container( network_mode: str | None = None port_mapping: dict[str, int] | None = None - if self.sandbox_config.use_host_network: + if self.config.sandbox.use_host_network: network_mode = 'host' logger.warn( 'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop' @@ -144,7 +138,7 @@ async def _init_container( working_dir='/opendevin/code/', name=self.container_name, detach=True, - environment={'DEBUG': 'true'} if config.debug else None, + environment={'DEBUG': 'true'} if self.config.debug else None, volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}, ) logger.info(f'Container started. Server url: {self.api_url}') @@ -179,7 +173,7 @@ async def _wait_until_alive(self): @property def sandbox_workspace_dir(self): - return config.workspace_mount_path_in_sandbox + return self.config.workspace_mount_path_in_sandbox async def close(self, close_client: bool = True): if self.session is not None and not self.session.closed: @@ -286,75 +280,3 @@ def get_working_directory(self): # Overwrite the init_sandbox_plugins def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None: pass - - -async def test_run_command(): - sid = 'test' - cli_session = 'main' + ('_' + sid if sid else '') - event_stream = EventStream(cli_session) - runtime = EventStreamRuntime( - sandbox_config=config.sandbox, event_stream=event_stream, sid=sid - ) - await runtime.ainit() - await runtime.run_action(CmdRunAction('ls -l')) - - -async def test_event_stream(): - sid = 'test' - cli_session = 'main' + ('_' + sid if sid else '') - event_stream = EventStream(cli_session) - runtime = EventStreamRuntime( - sandbox_config=config.sandbox, - event_stream=event_stream, - sid=sid, - container_image='ubuntu:22.04', - plugins=[JupyterRequirement(), AgentSkillsRequirement()], - ) - await runtime.ainit() - - # Test run command - action_cmd = CmdRunAction(command='ls -l') - logger.info(action_cmd, extra={'msg_type': 'ACTION'}) - logger.info(await runtime.run_action(action_cmd), extra={'msg_type': 'OBSERVATION'}) - - # Test run ipython - test_code = "print('Hello, `World`!\\n')" - action_ipython = IPythonRunCellAction(code=test_code) - logger.info(action_ipython, extra={'msg_type': 'ACTION'}) - logger.info( - await runtime.run_action(action_ipython), extra={'msg_type': 'OBSERVATION'} - ) - - # Test read file (file should not exist) - action_read = FileReadAction(path='hello.sh') - logger.info(action_read, extra={'msg_type': 'ACTION'}) - logger.info( - await runtime.run_action(action_read), extra={'msg_type': 'OBSERVATION'} - ) - - # Test write file - action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh') - logger.info(action_write, extra={'msg_type': 'ACTION'}) - logger.info( - await runtime.run_action(action_write), extra={'msg_type': 'OBSERVATION'} - ) - - # Test read file (file should exist) - action_read = FileReadAction(path='hello.sh') - logger.info(action_read, extra={'msg_type': 'ACTION'}) - logger.info( - await runtime.run_action(action_read), extra={'msg_type': 'OBSERVATION'} - ) - - # Test browse - action_browse = BrowseURLAction(url='https://google.com') - logger.info(action_browse, extra={'msg_type': 'ACTION'}) - logger.info( - await runtime.run_action(action_browse), extra={'msg_type': 'OBSERVATION'} - ) - - await runtime.close() - - -if __name__ == '__main__': - asyncio.run(test_event_stream()) diff --git a/opendevin/runtime/e2b/runtime.py b/opendevin/runtime/e2b/runtime.py index e21df3cb127a..36d162b3d8d8 100644 --- a/opendevin/runtime/e2b/runtime.py +++ b/opendevin/runtime/e2b/runtime.py @@ -1,4 +1,4 @@ -from opendevin.core.config import SandboxConfig +from opendevin.core.config import AppConfig from opendevin.events.action import ( FileReadAction, FileWriteAction, @@ -21,12 +21,12 @@ class E2BRuntime(ServerRuntime): def __init__( self, - sandbox_config: SandboxConfig, + config: AppConfig, event_stream: EventStream, sid: str = 'default', sandbox: Sandbox | None = None, ): - super().__init__(sandbox_config, event_stream, sid, sandbox) + super().__init__(config, event_stream, sid, sandbox) if not isinstance(self.sandbox, E2BSandbox): raise ValueError('E2BRuntime requires an E2BSandbox') self.file_store = E2BFileStore(self.sandbox.filesystem) diff --git a/opendevin/runtime/runtime.py b/opendevin/runtime/runtime.py index 219d7419b319..0be6d97d0af8 100644 --- a/opendevin/runtime/runtime.py +++ b/opendevin/runtime/runtime.py @@ -6,7 +6,7 @@ from abc import abstractmethod from typing import Any, Optional -from opendevin.core.config import SandboxConfig +from opendevin.core.config import AppConfig, SandboxConfig from opendevin.core.logger import opendevin_logger as logger from opendevin.events import EventStream, EventStreamSubscriber from opendevin.events.action import ( @@ -57,15 +57,15 @@ class Runtime: def __init__( self, - sandbox_config: SandboxConfig, + config: AppConfig, event_stream: EventStream, sid: str = 'default', ): self.sid = sid self.event_stream = event_stream self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event) - self.sandbox_config = copy.deepcopy(sandbox_config) - self.DEFAULT_ENV_VARS = _default_env_vars(self.sandbox_config) + self.config = copy.deepcopy(config) + self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox) atexit.register(self.close_sync) async def ainit(self, env_vars: dict[str, str] | None = None) -> None: diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index 0d51d4c253d1..04b6ee57fe05 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -1,6 +1,6 @@ from typing import Any, Optional -from opendevin.core.config import SandboxConfig, config +from opendevin.core.config import AppConfig from opendevin.core.exceptions import BrowserInitException from opendevin.core.logger import opendevin_logger as logger from opendevin.events.action import ( @@ -34,49 +34,50 @@ from .files import read_file, write_file -def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox: - if box_type == 'local': - return LocalBox(config=config.sandbox, workspace_base=config.workspace_base) - elif box_type == 'ssh': - return DockerSSHBox( - config=config.sandbox, - persist_sandbox=config.persist_sandbox, - workspace_mount_path=config.workspace_mount_path, - sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, - cache_dir=config.cache_dir, - run_as_devin=config.run_as_devin, - ssh_hostname=config.ssh_hostname, - ssh_password=config.ssh_password, - ssh_port=config.ssh_port, - sid=sid, - ) - elif box_type == 'e2b': - return E2BBox( - config=config.sandbox, - e2b_api_key=config.e2b_api_key, - ) - else: - raise ValueError(f'Invalid sandbox type: {box_type}') - - class ServerRuntime(Runtime): def __init__( self, - sandbox_config: SandboxConfig, + config: AppConfig, event_stream: EventStream, sid: str = 'default', sandbox: Sandbox | None = None, ): - super().__init__(sandbox_config, event_stream, sid) + super().__init__(config, event_stream, sid) self.file_store = LocalFileStore(config.workspace_base) if sandbox is None: - self.sandbox = create_sandbox(sid, config.sandbox.box_type) + self.sandbox = self.create_sandbox(sid, config.sandbox.box_type) self._is_external_sandbox = False else: self.sandbox = sandbox self._is_external_sandbox = True self.browser: BrowserEnv | None = None + def create_sandbox(self, sid: str = 'default', box_type: str = 'ssh') -> Sandbox: + if box_type == 'local': + return LocalBox( + config=self.config.sandbox, workspace_base=self.config.workspace_base + ) + elif box_type == 'ssh': + return DockerSSHBox( + config=self.config.sandbox, + persist_sandbox=self.config.persist_sandbox, + workspace_mount_path=self.config.workspace_mount_path, + sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, + cache_dir=self.config.cache_dir, + run_as_devin=self.config.run_as_devin, + ssh_hostname=self.config.ssh_hostname, + ssh_password=self.config.ssh_password, + ssh_port=self.config.ssh_port, + sid=sid, + ) + elif box_type == 'e2b': + return E2BBox( + config=self.config.sandbox, + e2b_api_key=self.config.e2b_api_key, + ) + else: + raise ValueError(f'Invalid sandbox type: {box_type}') + async def ainit(self, env_vars: dict[str, str] | None = None): # MUST call super().ainit() to initialize both default env vars # AND the ones in env vars! @@ -177,8 +178,8 @@ async def read(self, action: FileReadAction) -> Observation: return await read_file( action.path, working_dir, - config.workspace_base, - config.workspace_mount_path_in_sandbox, + self.config.workspace_base, + self.config.workspace_mount_path_in_sandbox, action.start, action.end, ) @@ -189,8 +190,8 @@ async def write(self, action: FileWriteAction) -> Observation: return await write_file( action.path, working_dir, - config.workspace_base, - config.workspace_mount_path_in_sandbox, + self.config.workspace_base, + self.config.workspace_mount_path_in_sandbox, action.content, action.start, action.end, diff --git a/opendevin/server/listen.py b/opendevin/server/listen.py index d8fe2f08f922..3bbba95cd1f9 100644 --- a/opendevin/server/listen.py +++ b/opendevin/server/listen.py @@ -8,6 +8,7 @@ from pathspec.patterns import GitWildMatchPattern from opendevin.server.data_models.feedback import FeedbackDataModel, store_feedback +from opendevin.storage import get_file_store with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -29,7 +30,7 @@ import agenthub # noqa F401 (we import this to get the agents registered) from opendevin.controller.agent import Agent -from opendevin.core.config import LLMConfig, config +from opendevin.core.config import LLMConfig, load_app_config from opendevin.core.logger import opendevin_logger as logger from opendevin.core.schema import AgentState # Add this import from opendevin.events.action import ChangeAgentStateAction, NullAction @@ -42,7 +43,9 @@ from opendevin.server.auth import get_sid_from_token, sign_token from opendevin.server.session import SessionManager -session_manager = SessionManager(config) +config = load_app_config() +file_store = get_file_store(config.file_store, config.file_store_path) +session_manager = SessionManager(config, file_store) app = FastAPI() app.add_middleware( diff --git a/opendevin/server/session/agent.py b/opendevin/server/session/agent.py index 85a2ced053bc..616a99501bff 100644 --- a/opendevin/server/session/agent.py +++ b/opendevin/server/session/agent.py @@ -4,12 +4,13 @@ from opendevin.controller import AgentController from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import LLMConfig, SandboxConfig +from opendevin.core.config import AppConfig, LLMConfig from opendevin.core.logger import opendevin_logger as logger from opendevin.events.stream import EventStream from opendevin.runtime import DockerSSHBox, get_runtime_cls from opendevin.runtime.runtime import Runtime from opendevin.runtime.server.runtime import ServerRuntime +from opendevin.storage.files import FileStore class AgentSession: @@ -25,15 +26,16 @@ class AgentSession: runtime: Optional[Runtime] = None _closed: bool = False - def __init__(self, sid): + def __init__(self, sid: str, file_store: FileStore): """Initializes a new instance of the Session class.""" self.sid = sid - self.event_stream = EventStream(sid) + self.event_stream = EventStream(sid, file_store) + self.file_store = file_store async def start( self, runtime_name: str, - sandbox_config: SandboxConfig, + config: AppConfig, agent: Agent, confirmation_mode: bool, max_iterations: int, @@ -49,7 +51,7 @@ async def start( raise Exception( 'Session already started. You need to close this session and start a new one.' ) - await self._create_runtime(runtime_name, sandbox_config) + await self._create_runtime(runtime_name, config) await self._create_controller( agent, confirmation_mode, @@ -63,13 +65,13 @@ async def close(self): return if self.controller is not None: end_state = self.controller.get_state() - end_state.save_to_session(self.sid) + end_state.save_to_session(self.sid, self.file_store) await self.controller.close() if self.runtime is not None: await self.runtime.close() self._closed = True - async def _create_runtime(self, runtime_name: str, sandbox_config: SandboxConfig): + async def _create_runtime(self, runtime_name: str, config: AppConfig): """Creates a runtime instance.""" if self.runtime is not None: raise Exception('Runtime already created') @@ -77,7 +79,7 @@ async def _create_runtime(self, runtime_name: str, sandbox_config: SandboxConfig logger.info(f'Using runtime: {runtime_name}') runtime_cls = get_runtime_cls(runtime_name) self.runtime = runtime_cls( - sandbox_config=sandbox_config, event_stream=self.event_stream, sid=self.sid + config=config, event_stream=self.event_stream, sid=self.sid ) await self.runtime.ainit() @@ -121,7 +123,7 @@ async def _create_controller( headless_mode=False, ) try: - agent_state = State.restore_from_session(self.sid) + agent_state = State.restore_from_session(self.sid, self.file_store) self.controller.set_initial_state( agent_state, max_iterations, confirmation_mode ) diff --git a/opendevin/server/session/manager.py b/opendevin/server/session/manager.py index 9d293c268c3e..a481afd2568d 100644 --- a/opendevin/server/session/manager.py +++ b/opendevin/server/session/manager.py @@ -6,6 +6,7 @@ from opendevin.core.config import AppConfig from opendevin.core.logger import opendevin_logger as logger +from opendevin.storage.files import FileStore from .session import Session @@ -15,14 +16,17 @@ class SessionManager: cleanup_interval: int = 300 session_timeout: int = 600 - def __init__(self, config: AppConfig): + def __init__(self, config: AppConfig, file_store: FileStore): asyncio.create_task(self._cleanup_sessions()) self.config = config + self.file_store = file_store def add_or_restart_session(self, sid: str, ws_conn: WebSocket) -> Session: if sid in self._sessions: asyncio.create_task(self._sessions[sid].close()) - self._sessions[sid] = Session(sid=sid, ws=ws_conn, config=self.config) + self._sessions[sid] = Session( + sid=sid, file_store=self.file_store, ws=ws_conn, config=self.config + ) return self._sessions[sid] def get_session(self, sid: str) -> Session | None: diff --git a/opendevin/server/session/session.py b/opendevin/server/session/session.py index 38e5a04c401f..d8434bffc7c8 100644 --- a/opendevin/server/session/session.py +++ b/opendevin/server/session/session.py @@ -20,6 +20,7 @@ from opendevin.events.serialization import event_from_dict, event_to_dict from opendevin.events.stream import EventStreamSubscriber from opendevin.llm.llm import LLM +from opendevin.storage.files import FileStore from .agent import AgentSession @@ -33,11 +34,13 @@ class Session: is_alive: bool = True agent_session: AgentSession - def __init__(self, sid: str, ws: WebSocket | None, config: AppConfig): + def __init__( + self, sid: str, ws: WebSocket | None, config: AppConfig, file_store: FileStore + ): self.sid = sid self.websocket = ws self.last_active_ts = int(time.time()) - self.agent_session = AgentSession(sid) + self.agent_session = AgentSession(sid, file_store) self.agent_session.event_stream.subscribe( EventStreamSubscriber.SERVER, self.on_event ) @@ -102,7 +105,7 @@ async def _initialize_agent(self, data: dict): try: await self.agent_session.start( runtime_name=self.config.runtime, - sandbox_config=self.config.sandbox, + config=self.config, agent=agent, confirmation_mode=confirmation_mode, max_iterations=max_iterations, diff --git a/opendevin/storage/__init__.py b/opendevin/storage/__init__.py index 6ac583fd48c1..c863d3ddc254 100644 --- a/opendevin/storage/__init__.py +++ b/opendevin/storage/__init__.py @@ -1,21 +1,14 @@ -from opendevin.core.config import config - from .files import FileStore from .local import LocalFileStore from .memory import InMemoryFileStore from .s3 import S3FileStore -def _get_file_store() -> FileStore: - if config.file_store == 'local': - return LocalFileStore(config.file_store_path) - elif config.file_store == 's3': +def get_file_store(file_store: str, file_store_path: str | None = None) -> FileStore: + if file_store == 'local': + if file_store_path is None: + raise ValueError('file_store_path is required for local file store') + return LocalFileStore(file_store_path) + elif file_store == 's3': return S3FileStore() return InMemoryFileStore() - - -singleton = _get_file_store() - - -def get_file_store() -> FileStore: - return singleton diff --git a/tests/integration/test_agent.py b/tests/integration/test_agent.py index 52f7f4154f13..cfc2ad1cfdc5 100644 --- a/tests/integration/test_agent.py +++ b/tests/integration/test_agent.py @@ -7,7 +7,7 @@ from opendevin.controller.agent import Agent from opendevin.controller.state.state import State -from opendevin.core.config import LLMConfig, parse_arguments +from opendevin.core.config import LLMConfig from opendevin.core.main import run_agent_controller from opendevin.core.schema import AgentState from opendevin.events.action import ( @@ -74,10 +74,9 @@ def validate_final_state(final_state: State | None, test_name: str): ) def test_write_simple_script(current_test_name: str) -> None: task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point." - args = parse_arguments() # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) final_state: State | None = asyncio.run( run_agent_controller( @@ -121,7 +120,6 @@ def test_write_simple_script(current_test_name: str) -> None: reason='local sandbox shows environment-dependent absolute path for pwd command', ) def test_edits(current_test_name: str): - args = parse_arguments() # Copy workspace artifacts to workspace_base location source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/') files = os.listdir(source_dir) @@ -132,7 +130,7 @@ def test_edits(current_test_name: str): shutil.copy(os.path.join(source_dir, file), dest_file) # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) # Execute the task task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.' @@ -164,10 +162,8 @@ def test_edits(current_test_name: str): reason='Currently, only ssh sandbox supports stateful tasks', ) def test_ipython(current_test_name: str): - args = parse_arguments() - # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) # Execute the task task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point." @@ -199,10 +195,8 @@ def test_ipython(current_test_name: str): reason='FIXME: local sandbox does not capture stderr', ) def test_simple_task_rejection(current_test_name: str): - args = parse_arguments() - # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) # Give an impossible task to do: cannot write a commit message because # the workspace is not a git repo @@ -224,10 +218,8 @@ def test_simple_task_rejection(current_test_name: str): reason='Currently, only ssh sandbox supports stateful tasks', ) def test_ipython_module(current_test_name: str): - args = parse_arguments() - # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) # Execute the task task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point." @@ -265,10 +257,8 @@ def test_ipython_module(current_test_name: str): reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) def test_browse_internet(http_server, current_test_name: str): - args = parse_arguments() - # Create the agent - agent = Agent.get_cls(args.agent_cls)(llm=LLM(LLMConfig())) + agent = Agent.get_cls(os.getenv('DEFAULT_AGENT'))(llm=LLM(LLMConfig())) # Execute the task task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.' diff --git a/tests/unit/test_event_stream.py b/tests/unit/test_event_stream.py index b6519a38a81a..4c7754c43f92 100644 --- a/tests/unit/test_event_stream.py +++ b/tests/unit/test_event_stream.py @@ -1,31 +1,39 @@ import json +import pathlib +import tempfile import pytest from opendevin.events import EventSource, EventStream -from opendevin.events.action import NullAction +from opendevin.events.action import ( + NullAction, +) from opendevin.events.observation import NullObservation +from opendevin.storage import get_file_store @pytest.fixture -def event_stream(): - event_stream = EventStream('abc') - yield event_stream - - # clear after each test - event_stream.clear() +def temp_dir(monkeypatch): + # get a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + pathlib.Path().mkdir(parents=True, exist_ok=True) + yield temp_dir def collect_events(stream): return [event for event in stream.get_events()] -def test_basic_flow(event_stream: EventStream): +def test_basic_flow(temp_dir: str): + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('abc', file_store) event_stream.add_event(NullAction(), EventSource.AGENT) assert len(collect_events(event_stream)) == 1 -def test_stream_storage(event_stream: EventStream): +def test_stream_storage(temp_dir: str): + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('abc', file_store) event_stream.add_event(NullObservation(''), EventSource.AGENT) assert len(collect_events(event_stream)) == 1 content = event_stream._file_store.read('sessions/abc/events/0.json') @@ -43,15 +51,17 @@ def test_stream_storage(event_stream: EventStream): } -def test_rehydration(event_stream: EventStream): +def test_rehydration(temp_dir: str): + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('abc', file_store) event_stream.add_event(NullObservation('obs1'), EventSource.AGENT) event_stream.add_event(NullObservation('obs2'), EventSource.AGENT) assert len(collect_events(event_stream)) == 2 - stream2 = EventStream('es2') + stream2 = EventStream('es2', file_store) assert len(collect_events(stream2)) == 0 - stream1rehydrated = EventStream('abc') + stream1rehydrated = EventStream('abc', file_store) events = collect_events(stream1rehydrated) assert len(events) == 2 assert events[0].content == 'obs1' diff --git a/tests/unit/test_ipython.py b/tests/unit/test_ipython.py index 6c716000a1a6..f1444a5ee8da 100644 --- a/tests/unit/test_ipython.py +++ b/tests/unit/test_ipython.py @@ -4,7 +4,7 @@ import pytest -from opendevin.core.config import SandboxConfig +from opendevin.core.config import AppConfig, SandboxConfig from opendevin.events.action import IPythonRunCellAction from opendevin.events.observation import IPythonRunCellObservation from opendevin.runtime.server.runtime import ServerRuntime @@ -42,7 +42,9 @@ async def test_run_python_backticks(): ): # Initialize the runtime with the mock event_stream runtime = ServerRuntime( - sandbox_config=SandboxConfig(box_type='ssh', persist_sandbox=False), + config=AppConfig( + persist_sandbox=False, sandbox=SandboxConfig(box_type='ssh') + ), event_stream=mock_event_stream, ) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index e160d9fb11c0..b4154e709881 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -1,4 +1,5 @@ import logging +import tempfile from unittest.mock import Mock, patch import pytest @@ -17,6 +18,7 @@ from opendevin.events.observation.error import ErrorObservation from opendevin.events.stream import EventSource, EventStream from opendevin.memory.history import ShortTermHistory +from opendevin.storage import get_file_store def collect_events(stream): @@ -28,11 +30,13 @@ def collect_events(stream): @pytest.fixture def event_stream(): - event_stream = EventStream('asdf') - yield event_stream + with tempfile.TemporaryDirectory() as temp_dir: + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('asdf', file_store) + yield event_stream - # clear after each test - event_stream.clear() + # clear after each test + event_stream.clear() class TestStuckDetector: diff --git a/tests/unit/test_micro_agents.py b/tests/unit/test_micro_agents.py index e6b66de8ff64..3c474aca140e 100644 --- a/tests/unit/test_micro_agents.py +++ b/tests/unit/test_micro_agents.py @@ -1,5 +1,6 @@ import json import os +import tempfile from unittest.mock import MagicMock import pytest @@ -12,15 +13,18 @@ from opendevin.events.action import MessageAction from opendevin.events.stream import EventStream from opendevin.memory.history import ShortTermHistory +from opendevin.storage import get_file_store @pytest.fixture def event_stream(): - event_stream = EventStream('asdf') - yield event_stream + with tempfile.TemporaryDirectory() as temp_dir: + file_store = get_file_store('local', temp_dir) + event_stream = EventStream('asdf', file_store) + yield event_stream - # clear after each test - event_stream.clear() + # clear after each test + event_stream.clear() def test_all_agents_are_loaded(): diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py index 0ad9e16da9f8..b7c24495ee93 100644 --- a/tests/unit/test_runtime.py +++ b/tests/unit/test_runtime.py @@ -9,7 +9,7 @@ import pytest -from opendevin.core.config import SandboxConfig +from opendevin.core.config import AppConfig, SandboxConfig from opendevin.core.logger import opendevin_logger as logger from opendevin.events import EventStream from opendevin.events.action import ( @@ -21,6 +21,7 @@ from opendevin.runtime.client.runtime import EventStreamRuntime from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement from opendevin.runtime.server.runtime import ServerRuntime +from opendevin.storage import get_file_store @pytest.fixture(autouse=True) @@ -46,23 +47,31 @@ def box_class(request): return request.param -async def _load_runtime(box_class, event_stream): +async def _load_runtime(temp_dir, box_class): sid = 'test' + cli_session = 'main_test' plugins = [JupyterRequirement(), AgentSkillsRequirement()] - sandbox_config = SandboxConfig( - use_host_network=True, + config = AppConfig( + workspace_base=temp_dir, + workspace_mount_path=temp_dir, + sandbox=SandboxConfig( + use_host_network=True, + ), ) - container_image = sandbox_config.container_image + file_store = get_file_store(config.file_store, config.file_store_path) + event_stream = EventStream(cli_session, file_store) + + container_image = config.sandbox.container_image # NOTE: we will use the default container image specified in the config.sandbox # if it is an official od_runtime image. if 'od_runtime' not in container_image: container_image = 'ubuntu:22.04' logger.warning( - f'`{sandbox_config.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.' + f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.' ) if box_class == EventStreamRuntime: runtime = EventStreamRuntime( - sandbox_config=sandbox_config, + config=config, event_stream=event_stream, sid=sid, # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime @@ -72,9 +81,7 @@ async def _load_runtime(box_class, event_stream): ) await runtime.ainit() elif box_class == ServerRuntime: - runtime = ServerRuntime( - sandbox_config=sandbox_config, event_stream=event_stream, sid=sid - ) + runtime = ServerRuntime(config=config, event_stream=event_stream, sid=sid) await runtime.ainit() runtime.init_sandbox_plugins(plugins) runtime.init_runtime_tools( @@ -89,12 +96,9 @@ async def _load_runtime(box_class, event_stream): @pytest.mark.asyncio -async def test_env_vars_os_environ(box_class): +async def test_env_vars_os_environ(temp_dir, box_class): with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}): - cli_session = 'main_test' - - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) + runtime = await _load_runtime(temp_dir, box_class) obs: CmdOutputObservation = await runtime.run_action( CmdRunAction(command='env') @@ -115,11 +119,8 @@ async def test_env_vars_os_environ(box_class): @pytest.mark.asyncio -async def test_env_vars_runtime_add_env_vars(box_class): - cli_session = 'main_test' - - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) +async def test_env_vars_runtime_add_env_vars(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) await runtime.add_env_vars({'QUUX': 'abc"def'}) obs: CmdOutputObservation = await runtime.run_action( @@ -136,11 +137,8 @@ async def test_env_vars_runtime_add_env_vars(box_class): @pytest.mark.asyncio -async def test_env_vars_runtime_add_empty_dict(box_class): - cli_session = 'main_test' - - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) +async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) prev_obs = await runtime.run_action(CmdRunAction(command='env')) assert prev_obs.exit_code == 0, 'The exit code should be 0.' @@ -160,11 +158,8 @@ async def test_env_vars_runtime_add_empty_dict(box_class): @pytest.mark.asyncio -async def test_env_vars_runtime_add_multiple_env_vars(box_class): - cli_session = 'main_test' - - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) +async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class): + runtime = await _load_runtime(temp_dir, box_class) await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'}) obs: CmdOutputObservation = await runtime.run_action( @@ -181,12 +176,9 @@ async def test_env_vars_runtime_add_multiple_env_vars(box_class): @pytest.mark.asyncio -async def test_env_vars_runtime_add_env_vars_overwrite(box_class): - cli_session = 'main_test' - +async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class): with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}): - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) + runtime = await _load_runtime(temp_dir, box_class) await runtime.add_env_vars({'FOOBAR': 'xyz'}) obs: CmdOutputObservation = await runtime.run_action( @@ -204,10 +196,7 @@ async def test_env_vars_runtime_add_env_vars_overwrite(box_class): @pytest.mark.asyncio async def test_bash_command_pexcept(temp_dir, box_class): - cli_session = 'main_test' - - event_stream = EventStream(cli_session) - runtime = await _load_runtime(box_class, event_stream) + runtime = await _load_runtime(temp_dir, box_class) # We set env var PS1="\u@\h:\w $" # and construct the PEXCEPT prompt base on it. diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py index e37b5b1a01e3..eebcdf037d1f 100644 --- a/tests/unit/test_sandbox.py +++ b/tests/unit/test_sandbox.py @@ -11,14 +11,14 @@ def create_docker_box_from_app_config( - path: str, config: AppConfig = None + path: str, config: AppConfig | None = None ) -> DockerSSHBox: if config is None: config = AppConfig( sandbox=SandboxConfig( box_type='ssh', - persist_sandbox=False, - ) + ), + persist_sandbox=False, ) return DockerSSHBox( config=config.sandbox, @@ -305,9 +305,9 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): config = AppConfig( sandbox=SandboxConfig( box_type='ssh', - persist_sandbox=False, enable_auto_lint=False, - ) + ), + persist_sandbox=False, ) assert not config.sandbox.enable_auto_lint box = create_docker_box_from_app_config(temp_dir, config) @@ -324,9 +324,9 @@ def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): sandbox=SandboxConfig( box_type='ssh', container_image=base_sandbox_image, - persist_sandbox=False, enable_auto_lint=False, - ) + ), + persist_sandbox=False, ) assert not config.sandbox.enable_auto_lint box = create_docker_box_from_app_config(temp_dir, config) @@ -337,11 +337,20 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir): config = AppConfig( sandbox=SandboxConfig( box_type='ssh', - persist_sandbox=False, - enable_auto_lint=False, - ) + ), + persist_sandbox=False, + ) + box = DockerSSHBox( + config=config.sandbox, + persist_sandbox=config.persist_sandbox, + workspace_mount_path=temp_dir, + sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, + cache_dir=config.cache_dir, + run_as_devin=True, + ssh_hostname=config.ssh_hostname, + ssh_password=config.ssh_password, + ssh_port=config.ssh_port, ) - box = create_docker_box_from_app_config(temp_dir, config) box.init_plugins([JupyterRequirement]) test_code = "print('Hello, `World`!')" expected_write_command = (