From 2de75d478285adbd34ef6754e221450f3631f1ac Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 21 Mar 2024 21:54:56 +0800 Subject: [PATCH] Minimal Docker Sandbox with GPT-3.5 Execution Example (#48) * minimal docker sandbox * make container_image as an argument (fall back to ubuntu); increase timeout to avoid return too early for long running commands; * add a minimal working (imperfect) example * fix typo * change default container name * attempt to fix "Bad file descriptor" error * handle ctrl+D * add Python gitignore * push sandbox to shared dockerhub for ease of use * move codeact example into research folder * add README for opendevin * change container image name to opendevin dockerhub * move folder; change example to a more general agent * update Message and Role * update docker sandbox to support mounting folder and switch to user with correct permission * make network as host * handle erorrs when attrs are not set yet * convert codeact agent into a compatible agent * add workspace to gitignore * make sure the agent interface adjustment works for langchain_agent --- .gitignore | 2 +- agenthub/__init__.py | 1 + agenthub/codeact_agent/README.md | 21 +++ agenthub/codeact_agent/__init__.py | 124 ++++++++++++++++++ agenthub/langchains_agent/__init__.py | 3 + agenthub/langchains_agent/requirements.txt | 2 + opendevin/README.md | 18 +++ opendevin/agent.py | 37 ++++-- opendevin/main.py | 11 +- opendevin/sandbox/Dockerfile | 20 +++ opendevin/sandbox/docker.py | 145 +++++++++++++++++++++ requirements.txt | 2 + 12 files changed, 373 insertions(+), 13 deletions(-) create mode 100644 agenthub/codeact_agent/README.md create mode 100644 agenthub/codeact_agent/__init__.py create mode 100644 opendevin/README.md create mode 100644 opendevin/sandbox/Dockerfile create mode 100644 opendevin/sandbox/docker.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 5e212b253169..f7fd501a1be0 100644 --- a/.gitignore +++ b/.gitignore @@ -187,4 +187,4 @@ yarn-error.log* # agent .envrc -agent/workspace +/workspace diff --git a/agenthub/__init__.py b/agenthub/__init__.py index ced5095ca3cc..781c578b91c3 100644 --- a/agenthub/__init__.py +++ b/agenthub/__init__.py @@ -1 +1,2 @@ from . import langchains_agent +from . import codeact_agent diff --git a/agenthub/codeact_agent/README.md b/agenthub/codeact_agent/README.md new file mode 100644 index 000000000000..7be79deed47e --- /dev/null +++ b/agenthub/codeact_agent/README.md @@ -0,0 +1,21 @@ +# CodeAct-based Agent Framework + +This folder implements the [CodeAct idea](https://arxiv.org/abs/2402.13463) that relies on LLM to autonomously perform actions in a Bash shell. It requires more from the LLM itself: LLM needs to be capable enough to do all the stuff autonomously, instead of stuck in an infinite loop. + +A minimalistic exmaple can be found at [research/codeact/examples/run_flask_server_with_bash.py](./examples/run_flask_server_with_bash.py): + +```bash +mkdir workspace +PYTHONPATH=`pwd`:$PYTHONPATH python3 opendevin/main.py -d ./workspace -c CodeActAgent -t "Please write a flask app that returns 'Hello, World\!' at the root URL, then start the app on port 5000. python3 has already been installed for you." +``` + + +Example: prompts `gpt-3.5-turbo-0125` to write a flask server, install `flask` library, and start the server. + +image + +image + +Most of the things are working as expected, except at the end, the model did not follow the instruction to stop the interaction by outputting ` exit ` as instructed. + +**TODO**: This should be fixable by either (1) including a complete in-context example like [this](https://github.com/xingyaoww/mint-bench/blob/main/mint/tasks/in_context_examples/reasoning/with_tool.txt), OR (2) collect some interaction data like this and fine-tune a model (like [this](https://github.com/xingyaoww/code-act), a more complex route). diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py new file mode 100644 index 000000000000..cc3d0c0cb701 --- /dev/null +++ b/agenthub/codeact_agent/__init__.py @@ -0,0 +1,124 @@ +import os +import re +import argparse +from litellm import completion +from termcolor import colored +from typing import List, Dict + +from opendevin.agent import Agent, Message, Role +from opendevin.sandbox.docker import DockerInteractive + +assert ( + "OPENAI_API_KEY" in os.environ +), "Please set the OPENAI_API_KEY environment variable." + + + +SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks. +You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands. + +DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with and . +For example: + +You can list the files in the current directory by executing the following command: +ls + +You can also install packages using pip: + pip install numpy + +You can also write a block of code to a file: + +echo "import math +print(math.pi)" > math.py + + +When you are done, execute "exit" to close the shell and end the conversation. +""" + +INVALID_INPUT_MESSAGE = ( + "I don't understand your input. \n" + "If you want to execute command, please use YOUR_COMMAND_HERE .\n" + "If you already completed the task, please exit the shell by generating: exit ." +) + + +def parse_response(response) -> str: + action = response.choices[0].message.content + if "" in action and "" not in action: + action += "" + return action + + +class CodeActAgent(Agent): + def __init__( + self, + instruction: str, + workspace_dir: str, + model_name: str, + max_steps: int = 100 + ) -> None: + """ + Initializes a new instance of the CodeActAgent class. + + Parameters: + - instruction (str): The instruction for the agent to execute. + - max_steps (int): The maximum number of steps to run the agent. + """ + super().__init__(instruction, workspace_dir, model_name, max_steps) + self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)] + self._history.append(Message(Role.USER, instruction)) + self.env = DockerInteractive(workspace_dir=workspace_dir) + print(colored("===USER:===\n" + instruction, "green")) + + def _history_to_messages(self) -> List[Dict]: + return [message.to_dict() for message in self._history] + + def run(self) -> None: + """ + Starts the execution of the assigned instruction. This method should + be implemented by subclasses to define the specific execution logic. + """ + for _ in range(self.max_steps): + response = completion( + messages=self._history_to_messages(), + model=self.model_name, + stop=[""], + temperature=0.0, + seed=42, + ) + action = parse_response(response) + self._history.append(Message(Role.ASSISTANT, action)) + print(colored("===ASSISTANT:===\n" + action, "yellow")) + + command = re.search(r"(.*)", action, re.DOTALL) + if command is not None: + # a command was found + command = command.group(1) + if command.strip() == "exit": + print(colored("Exit received. Exiting...", "red")) + break + # execute the code + observation = self.env.execute(command) + self._history.append(Message(Role.ASSISTANT, observation)) + print(colored("===ENV OBSERVATION:===\n" + observation, "blue")) + else: + # we could provide a error message for the model to continue similar to + # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23 + observation = INVALID_INPUT_MESSAGE + self._history.append(Message(Role.ASSISTANT, observation)) + print(colored("===ENV OBSERVATION:===\n" + observation, "blue")) + + self.env.close() + + def chat(self, message: str) -> None: + """ + Optional method for interactive communication with the agent during its execution. Implementations + can use this method to modify the agent's behavior or state based on chat inputs. + + Parameters: + - message (str): The chat message or command. + """ + raise NotImplementedError + + +Agent.register("CodeActAgent", CodeActAgent) diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py index aa72d803517d..cc48f550d373 100644 --- a/agenthub/langchains_agent/__init__.py +++ b/agenthub/langchains_agent/__init__.py @@ -69,6 +69,9 @@ def run(self) -> None: Starts the execution of the assigned instruction. This method should be implemented by subclasses to define the specific execution logic. """ + print("Working in directory:", self.workspace_dir) + os.chdir(self.workspace_dir) + agent = LangchainsAgentImpl(self.instruction) next_is_output = False for thought in INITIAL_THOUGHTS: diff --git a/agenthub/langchains_agent/requirements.txt b/agenthub/langchains_agent/requirements.txt index 48755f89b969..b2dcf5377cb5 100644 --- a/agenthub/langchains_agent/requirements.txt +++ b/agenthub/langchains_agent/requirements.txt @@ -4,3 +4,5 @@ langchain-community llama-index llama-index-vector-stores-chroma chromadb +litellm +termcolor diff --git a/opendevin/README.md b/opendevin/README.md new file mode 100644 index 000000000000..c44cc05d2b43 --- /dev/null +++ b/opendevin/README.md @@ -0,0 +1,18 @@ +# OpenDevin Shared Abstraction and Components + +This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium). + +## Sandbox component + +Run the docker-based sandbox interactive: + +```bash +mkdir workspace +python3 opendevin/sandbox/docker.py -d workspace +``` + +It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user. + +Example screenshot: + +image diff --git a/opendevin/agent.py b/opendevin/agent.py index 41fa9699978d..ca779240ad98 100644 --- a/opendevin/agent.py +++ b/opendevin/agent.py @@ -5,11 +5,11 @@ class Role(Enum): + SYSTEM = "system" # system message for LLM USER = "user" # the user ASSISTANT = "assistant" # the agent ENVIRONMENT = "environment" # the environment (e.g., bash shell, web browser, etc.) - @dataclass class Message: """ @@ -20,6 +20,20 @@ class Message: content: str # TODO: add more fields as needed + def to_dict(self) -> Dict: + """ + Converts the message to a dictionary (OpenAI chat-completion format). + + Returns: + - message (Dict): A dictionary representation of the message. + """ + role = self.role.value + content = self.content + if self.role == Role.ENVIRONMENT: + content = f"Environment Observation:\n{content}" + role = "user" # treat environment messages as user messages + return {"role": role, "content": content} + class Agent(ABC): """ @@ -27,6 +41,11 @@ class Agent(ABC): executing a specific instruction and allowing human interaction with the agent during execution. It tracks the execution status and maintains a history of interactions. + + :param instruction: The instruction for the agent to execute. + :param workspace_dir: The working directory for the agent. + :param model_name: The litellm name of the model to use for the agent. + :param max_steps: The maximum number of steps to run the agent. """ _registry: Dict[str, Type['Agent']] = {} @@ -34,9 +53,13 @@ class Agent(ABC): def __init__( self, instruction: str, + workspace_dir: str, + model_name: str, max_steps: int = 100 ): self.instruction = instruction + self.workspace_dir = workspace_dir + self.model_name = model_name self.max_steps = max_steps self._complete = False @@ -105,18 +128,16 @@ def register(cls, name: str, agent_cls: Type['Agent']): cls._registry[name] = agent_cls @classmethod - def create_instance(cls, name: str, instruction: str) -> 'Agent': + def get_cls(cls, name: str) -> Type['Agent']: """ - Creates an instance of a registered agent class based on the given name. + Retrieves an agent class from the registry. Parameters: - - name (str): The name of the agent class to instantiate. - - instruction (str): The instruction for the new agent instance. + - name (str): The name of the class to retrieve Returns: - - An instance of the specified agent class. + - agent_cls (Type['Agent']): The class registered under the specified name. """ if name not in cls._registry: raise ValueError(f"No agent class registered under '{name}'.") - agent_cls = cls._registry[name] - return agent_cls(instruction) + return cls._registry[name] diff --git a/opendevin/main.py b/opendevin/main.py index 10c4dba75dd5..c5bc0386ed7c 100644 --- a/opendevin/main.py +++ b/opendevin/main.py @@ -9,10 +9,13 @@ parser.add_argument("-d", "--directory", required=True, type=str, help="The working directory for the agent") parser.add_argument("-t", "--task", required=True, type=str, help="The task for the agent to perform") parser.add_argument("-c", "--agent-cls", default="LangchainsAgent", type=str, help="The agent class to use") + parser.add_argument("-m", "--model-name", default="gpt-3.5-turbo-0125", type=str, help="The (litellm) model name to use") args = parser.parse_args() - print("Working in directory:", args.directory) - os.chdir(args.directory) - - agent = Agent.create_instance(args.agent_cls, args.task) + AgentCls: Agent = Agent.get_cls(args.agent_cls) + agent = AgentCls( + instruction=args.task, + workspace_dir=args.directory, + model_name=args.model_name + ) agent.run() diff --git a/opendevin/sandbox/Dockerfile b/opendevin/sandbox/Dockerfile new file mode 100644 index 000000000000..d855985a60a3 --- /dev/null +++ b/opendevin/sandbox/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:22.04 + +# install basic packages +RUN apt-get update && apt-get install -y \ + curl \ + wget \ + git \ + vim \ + nano \ + unzip \ + zip \ + python3 \ + python3-pip \ + python3-venv \ + python3-dev \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# docker build -f opendevin/sandbox/Dockerfile -t opendevin/sandbox:v0.1 . +# docker push opendevin/sandbox:v0.1 diff --git a/opendevin/sandbox/docker.py b/opendevin/sandbox/docker.py new file mode 100644 index 000000000000..90af388b7643 --- /dev/null +++ b/opendevin/sandbox/docker.py @@ -0,0 +1,145 @@ +import os +import pty +import sys +import uuid +import time +import shlex +import select +import subprocess +from typing import List +from collections import namedtuple + +InputType = namedtuple("InputDtype", ["content"]) +OutputType = namedtuple("OutputDtype", ["content"]) + + +class DockerInteractive: + CONTAINER_IMAGE = "opendevin/sandbox:latest" + + def __init__( + self, + workspace_dir: str = None, + container_image: str = None, + timeout: int = 5 + ): + self.instance_id: str = uuid.uuid4() + if workspace_dir is not None: + assert os.path.exists(workspace_dir), f"Directory {workspace_dir} does not exist." + # expand to absolute path + workspace_dir = os.path.abspath(workspace_dir) + else: + workspace_dir = os.getcwd() + print(f"workspace unspecified, using current directory: {workspace_dir}") + + # TODO: this timeout is actually essential - need a better way to set it + # if it is too short, the container may still waiting for previous + # command to finish (e.g. apt-get update) + # if it is too long, the user may have to wait for a unnecessary long time + self.timeout: int = timeout + + if container_image is None: + container_image = self.CONTAINER_IMAGE + + uid = os.getuid() + cmd = ( + f"docker run -it --rm --name sandbox-{self.instance_id} " + f"-v {workspace_dir}:/workspace " + f"-w /workspace " + f"--network=host " + f"{container_image} " + f"/bin/bash -c 'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin && su devin'" + ) + # print(f"Starting Docker container with command: {cmd}") + self.master_fd, self.slave_fd = pty.openpty() + self.container = subprocess.Popen( + shlex.split(cmd), + stdin=self.slave_fd, + stdout=self.slave_fd, + stderr=self.slave_fd, + text=True, + close_fds=True, + ) + time.sleep(1) # wait for the container to start + # TODO: use a more robust way to check if the container is ready + self.history: List[InputType | OutputType] = [ + OutputType(self._wait_and_read_output()) + ] + + def _wait_and_read_output(self, user_input: str = None) -> str: + output_str = "" + while True: + readable, _, _ = select.select([self.master_fd], [], [], self.timeout) + if readable: + output = os.read(self.master_fd, 1024).decode() + if not output: + break + output_str += output + else: + break + if user_input: + output_str = output_str.lstrip(user_input).lstrip() + return output_str + + def execute(self, cmd: str) -> str: + os.write(self.master_fd, (cmd + "\n").encode()) + self.history.append(InputType(cmd)) + + output = self._wait_and_read_output(cmd) + self.history.append(OutputType(output)) + return output + + def close(self): + if hasattr(self, "master_fd") and self.master_fd is not None: + os.close(self.master_fd) + self.master_fd = None + + if hasattr(self, "container") and self.container is not None: + self.container.terminate() + try: + self.container.wait(timeout=5) + print("Container stopped.") + except subprocess.TimeoutExpired: + self.container.kill() + print("Container killed.") + self.container = None + + def __del__(self): + self.close() + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Interactive Docker container") + parser.add_argument( + "-d", + "--directory", + type=str, + default=None, + help="The directory to mount as the workspace in the Docker container.", + ) + args = parser.parse_args() + + docker_interactive = DockerInteractive( + workspace_dir=args.directory, + container_image="opendevin/sandbox:latest", + ) + print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.") + + for item in docker_interactive.history: + print(item.content, end="") + sys.stdout.flush() + try: + while True: + try: + user_input = input() + except EOFError: + print("\nExiting...") + break + if user_input.lower() == "exit": + print(f"Exiting...") + break + output = docker_interactive.execute(user_input) + print(output, end="") + sys.stdout.flush() + except KeyboardInterrupt: + print("\nExiting...") + docker_interactive.close() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000000..700ec2d821f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +litellm +termcolor