From 2de75d478285adbd34ef6754e221450f3631f1ac Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao6@illinois.edu>
Date: Thu, 21 Mar 2024 21:54:56 +0800
Subject: [PATCH] Minimal Docker Sandbox with GPT-3.5 Execution Example (#48)

* minimal docker sandbox

* make container_image as an argument (fall back to ubuntu);
increase timeout to avoid return too early for long running commands;

* add a minimal working (imperfect) example

* fix typo

* change default container name

* attempt to fix "Bad file descriptor" error

* handle ctrl+D

* add Python gitignore

* push sandbox to shared dockerhub for ease of use

* move codeact example into research folder

* add README for opendevin

* change container image name to opendevin dockerhub

* move folder; change example to a more general agent

* update Message and Role

* update docker sandbox to support mounting folder and switch to user with correct permission

* make network as host

* handle erorrs when attrs are not set yet

* convert codeact agent into a compatible agent

* add workspace to gitignore

* make sure the agent interface adjustment works for langchain_agent
---
 .gitignore                                 |   2 +-
 agenthub/__init__.py                       |   1 +
 agenthub/codeact_agent/README.md           |  21 +++
 agenthub/codeact_agent/__init__.py         | 124 ++++++++++++++++++
 agenthub/langchains_agent/__init__.py      |   3 +
 agenthub/langchains_agent/requirements.txt |   2 +
 opendevin/README.md                        |  18 +++
 opendevin/agent.py                         |  37 ++++--
 opendevin/main.py                          |  11 +-
 opendevin/sandbox/Dockerfile               |  20 +++
 opendevin/sandbox/docker.py                | 145 +++++++++++++++++++++
 requirements.txt                           |   2 +
 12 files changed, 373 insertions(+), 13 deletions(-)
 create mode 100644 agenthub/codeact_agent/README.md
 create mode 100644 agenthub/codeact_agent/__init__.py
 create mode 100644 opendevin/README.md
 create mode 100644 opendevin/sandbox/Dockerfile
 create mode 100644 opendevin/sandbox/docker.py
 create mode 100644 requirements.txt
diff --git a/.gitignore b/.gitignore
index 5e212b253169..f7fd501a1be0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -187,4 +187,4 @@ yarn-error.log*
 
 # agent
 .envrc
-agent/workspace
+/workspace
diff --git a/agenthub/__init__.py b/agenthub/__init__.py
index ced5095ca3cc..781c578b91c3 100644
--- a/agenthub/__init__.py
+++ b/agenthub/__init__.py
@@ -1 +1,2 @@
 from . import langchains_agent
+from . import codeact_agent
diff --git a/agenthub/codeact_agent/README.md b/agenthub/codeact_agent/README.md
new file mode 100644
index 000000000000..7be79deed47e
--- /dev/null
+++ b/agenthub/codeact_agent/README.md
@@ -0,0 +1,21 @@
+# CodeAct-based Agent Framework
+
+This folder implements the [CodeAct idea](https://arxiv.org/abs/2402.13463) that relies on LLM to autonomously perform actions in a Bash shell. It requires more from the LLM itself: LLM needs to be capable enough to do all the stuff autonomously, instead of stuck in an infinite loop. 
+
+A minimalistic exmaple can be found at [research/codeact/examples/run_flask_server_with_bash.py](./examples/run_flask_server_with_bash.py):
+
+```bash
+mkdir workspace
+PYTHONPATH=`pwd`:$PYTHONPATH python3 opendevin/main.py -d ./workspace -c CodeActAgent -t "Please write a flask app that returns 'Hello, World\!' at the root URL, then start the app on port 5000. python3 has already been installed for you."
+```
+
+
+Example: prompts `gpt-3.5-turbo-0125` to write a flask server, install `flask` library, and start the server.
+
+<img width="951" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/325c3115-a343-4cc5-a92b-f1e5d552a077">
+
+<img width="957" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/68ad10c1-744a-4e9d-bb29-0f163d665a0a">
+
+Most of the things are working as expected, except at the end, the model did not follow the instruction to stop the interaction by outputting `<execute> exit </execute>` as instructed. 
+
+**TODO**: This should be fixable by either (1) including a complete in-context example like [this](https://github.com/xingyaoww/mint-bench/blob/main/mint/tasks/in_context_examples/reasoning/with_tool.txt), OR (2) collect some interaction data like this and fine-tune a model (like [this](https://github.com/xingyaoww/code-act), a more complex route).
diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py
new file mode 100644
index 000000000000..cc3d0c0cb701
--- /dev/null
+++ b/agenthub/codeact_agent/__init__.py
@@ -0,0 +1,124 @@
+import os
+import re
+import argparse
+from litellm import completion
+from termcolor import colored
+from typing import List, Dict
+
+from opendevin.agent import Agent, Message, Role
+from opendevin.sandbox.docker import DockerInteractive
+
+assert (
+    "OPENAI_API_KEY" in os.environ
+), "Please set the OPENAI_API_KEY environment variable."
+
+
+
+SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
+You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
+
+DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
+For example:
+
+You can list the files in the current directory by executing the following command:
+<execute>ls</execute>
+
+You can also install packages using pip:
+<execute> pip install numpy </execute>
+
+You can also write a block of code to a file:
+<execute>
+echo "import math
+print(math.pi)" > math.py
+</execute>
+
+When you are done, execute "exit" to close the shell and end the conversation.
+"""
+
+INVALID_INPUT_MESSAGE = (
+    "I don't understand your input. \n"
+    "If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
+    "If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
+)
+
+
+def parse_response(response) -> str:
+    action = response.choices[0].message.content
+    if "<execute>" in action and "</execute>" not in action:
+        action += "</execute>"
+    return action
+
+
+class CodeActAgent(Agent):
+    def __init__(
+        self,
+        instruction: str,
+        workspace_dir: str,
+        model_name: str,
+        max_steps: int = 100
+    ) -> None:
+        """
+        Initializes a new instance of the CodeActAgent class.
+
+        Parameters:
+        - instruction (str): The instruction for the agent to execute.
+        - max_steps (int): The maximum number of steps to run the agent.
+        """
+        super().__init__(instruction, workspace_dir, model_name, max_steps)
+        self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)]
+        self._history.append(Message(Role.USER, instruction))
+        self.env = DockerInteractive(workspace_dir=workspace_dir)
+        print(colored("===USER:===\n" + instruction, "green"))
+
+    def _history_to_messages(self) -> List[Dict]:
+        return [message.to_dict() for message in self._history]
+
+    def run(self) -> None:
+        """
+        Starts the execution of the assigned instruction. This method should
+        be implemented by subclasses to define the specific execution logic.
+        """
+        for _ in range(self.max_steps):
+            response = completion(
+                messages=self._history_to_messages(),
+                model=self.model_name,
+                stop=["</execute>"],
+                temperature=0.0,
+                seed=42,
+            )
+            action = parse_response(response)
+            self._history.append(Message(Role.ASSISTANT, action))
+            print(colored("===ASSISTANT:===\n" + action, "yellow"))
+
+            command = re.search(r"<execute>(.*)</execute>", action, re.DOTALL)
+            if command is not None:
+                # a command was found
+                command = command.group(1)
+                if command.strip() == "exit":
+                    print(colored("Exit received. Exiting...", "red"))
+                    break
+                # execute the code
+                observation = self.env.execute(command)
+                self._history.append(Message(Role.ASSISTANT, observation))
+                print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+            else:
+                # we could provide a error message for the model to continue similar to
+                # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
+                observation = INVALID_INPUT_MESSAGE
+                self._history.append(Message(Role.ASSISTANT, observation))
+                print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+
+        self.env.close()
+
+    def chat(self, message: str) -> None:
+        """
+        Optional method for interactive communication with the agent during its execution. Implementations
+        can use this method to modify the agent's behavior or state based on chat inputs.
+
+        Parameters:
+        - message (str): The chat message or command.
+        """
+        raise NotImplementedError
+
+
+Agent.register("CodeActAgent", CodeActAgent)
diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py
index aa72d803517d..cc48f550d373 100644
--- a/agenthub/langchains_agent/__init__.py
+++ b/agenthub/langchains_agent/__init__.py
@@ -69,6 +69,9 @@ def run(self) -> None:
         Starts the execution of the assigned instruction. This method should
         be implemented by subclasses to define the specific execution logic.
         """
+        print("Working in directory:", self.workspace_dir)
+        os.chdir(self.workspace_dir)
+
         agent = LangchainsAgentImpl(self.instruction)
         next_is_output = False
         for thought in INITIAL_THOUGHTS:
diff --git a/agenthub/langchains_agent/requirements.txt b/agenthub/langchains_agent/requirements.txt
index 48755f89b969..b2dcf5377cb5 100644
--- a/agenthub/langchains_agent/requirements.txt
+++ b/agenthub/langchains_agent/requirements.txt
@@ -4,3 +4,5 @@ langchain-community
 llama-index
 llama-index-vector-stores-chroma
 chromadb
+litellm
+termcolor
diff --git a/opendevin/README.md b/opendevin/README.md
new file mode 100644
index 000000000000..c44cc05d2b43
--- /dev/null
+++ b/opendevin/README.md
@@ -0,0 +1,18 @@
+# OpenDevin Shared Abstraction and Components
+
+This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium).
+
+## Sandbox component
+
+Run the docker-based sandbox interactive:
+
+```bash
+mkdir workspace
+python3 opendevin/sandbox/docker.py -d workspace
+```
+
+It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user.
+
+Example screenshot:
+
+<img width="868" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/8dedcdee-437a-4469-870f-be29ca2b7c32">
diff --git a/opendevin/agent.py b/opendevin/agent.py
index 41fa9699978d..ca779240ad98 100644
--- a/opendevin/agent.py
+++ b/opendevin/agent.py
@@ -5,11 +5,11 @@
 
 
 class Role(Enum):
+    SYSTEM = "system"  # system message for LLM
     USER = "user"  # the user
     ASSISTANT = "assistant"  # the agent
     ENVIRONMENT = "environment"  # the environment (e.g., bash shell, web browser, etc.)
 
-
 @dataclass
 class Message:
     """
@@ -20,6 +20,20 @@ class Message:
     content: str
     # TODO: add more fields as needed
 
+    def to_dict(self) -> Dict:
+        """
+        Converts the message to a dictionary (OpenAI chat-completion format).
+
+        Returns:
+        - message (Dict): A dictionary representation of the message.
+        """
+        role = self.role.value
+        content = self.content
+        if self.role == Role.ENVIRONMENT:
+            content = f"Environment Observation:\n{content}"
+            role = "user"  # treat environment messages as user messages
+        return {"role": role, "content": content}
+
 
 class Agent(ABC):
     """
@@ -27,6 +41,11 @@ class Agent(ABC):
     executing a specific instruction and allowing human interaction with the
     agent during execution.
     It tracks the execution status and maintains a history of interactions.
+
+    :param instruction: The instruction for the agent to execute.
+    :param workspace_dir: The working directory for the agent.
+    :param model_name: The litellm name of the model to use for the agent.
+    :param max_steps: The maximum number of steps to run the agent.
     """
 
     _registry: Dict[str, Type['Agent']] = {}
@@ -34,9 +53,13 @@ class Agent(ABC):
     def __init__(
         self,
         instruction: str,
+        workspace_dir: str,
+        model_name: str,
         max_steps: int = 100
     ):
         self.instruction = instruction
+        self.workspace_dir = workspace_dir
+        self.model_name = model_name
         self.max_steps = max_steps
 
         self._complete = False
@@ -105,18 +128,16 @@ def register(cls, name: str, agent_cls: Type['Agent']):
         cls._registry[name] = agent_cls
 
     @classmethod
-    def create_instance(cls, name: str, instruction: str) -> 'Agent':
+    def get_cls(cls, name: str) -> Type['Agent']:
         """
-        Creates an instance of a registered agent class based on the given name.
+        Retrieves an agent class from the registry.
 
         Parameters:
-        - name (str): The name of the agent class to instantiate.
-        - instruction (str): The instruction for the new agent instance.
+        - name (str): The name of the class to retrieve
 
         Returns:
-        - An instance of the specified agent class.
+        - agent_cls (Type['Agent']): The class registered under the specified name.
         """
         if name not in cls._registry:
             raise ValueError(f"No agent class registered under '{name}'.")
-        agent_cls = cls._registry[name]
-        return agent_cls(instruction)
+        return cls._registry[name]
diff --git a/opendevin/main.py b/opendevin/main.py
index 10c4dba75dd5..c5bc0386ed7c 100644
--- a/opendevin/main.py
+++ b/opendevin/main.py
@@ -9,10 +9,13 @@
     parser.add_argument("-d", "--directory", required=True, type=str, help="The working directory for the agent")
     parser.add_argument("-t", "--task", required=True, type=str, help="The task for the agent to perform")
     parser.add_argument("-c", "--agent-cls", default="LangchainsAgent", type=str, help="The agent class to use")
+    parser.add_argument("-m", "--model-name", default="gpt-3.5-turbo-0125", type=str, help="The (litellm) model name to use")
     args = parser.parse_args()
 
-    print("Working in directory:", args.directory)
-    os.chdir(args.directory)
-
-    agent = Agent.create_instance(args.agent_cls, args.task)
+    AgentCls: Agent = Agent.get_cls(args.agent_cls)
+    agent = AgentCls(
+        instruction=args.task,
+        workspace_dir=args.directory,
+        model_name=args.model_name
+    )
     agent.run()
diff --git a/opendevin/sandbox/Dockerfile b/opendevin/sandbox/Dockerfile
new file mode 100644
index 000000000000..d855985a60a3
--- /dev/null
+++ b/opendevin/sandbox/Dockerfile
@@ -0,0 +1,20 @@
+FROM ubuntu:22.04
+
+# install basic packages
+RUN apt-get update && apt-get install -y \
+    curl \
+    wget \
+    git \
+    vim \
+    nano \
+    unzip \
+    zip \
+    python3 \
+    python3-pip \
+    python3-venv \
+    python3-dev \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# docker build -f opendevin/sandbox/Dockerfile -t opendevin/sandbox:v0.1 .
+# docker push opendevin/sandbox:v0.1
diff --git a/opendevin/sandbox/docker.py b/opendevin/sandbox/docker.py
new file mode 100644
index 000000000000..90af388b7643
--- /dev/null
+++ b/opendevin/sandbox/docker.py
@@ -0,0 +1,145 @@
+import os
+import pty
+import sys
+import uuid
+import time
+import shlex
+import select
+import subprocess
+from typing import List
+from collections import namedtuple
+
+InputType = namedtuple("InputDtype", ["content"])
+OutputType = namedtuple("OutputDtype", ["content"])
+
+
+class DockerInteractive:
+    CONTAINER_IMAGE = "opendevin/sandbox:latest"
+
+    def __init__(
+        self,
+        workspace_dir: str = None,
+        container_image: str = None,
+        timeout: int = 5
+    ):
+        self.instance_id: str = uuid.uuid4()
+        if workspace_dir is not None:
+            assert os.path.exists(workspace_dir), f"Directory {workspace_dir} does not exist."
+            # expand to absolute path
+            workspace_dir = os.path.abspath(workspace_dir)
+        else:
+            workspace_dir = os.getcwd()
+            print(f"workspace unspecified, using current directory: {workspace_dir}")
+        
+        # TODO: this timeout is actually essential - need a better way to set it
+        # if it is too short, the container may still waiting for previous
+        # command to finish (e.g. apt-get update)
+        # if it is too long, the user may have to wait for a unnecessary long time
+        self.timeout: int = timeout
+
+        if container_image is None:
+            container_image = self.CONTAINER_IMAGE
+
+        uid = os.getuid()
+        cmd = (
+            f"docker run -it --rm --name sandbox-{self.instance_id} "
+            f"-v {workspace_dir}:/workspace "
+            f"-w /workspace "
+            f"--network=host "
+            f"{container_image} "
+            f"/bin/bash -c 'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin && su devin'"
+        )
+        # print(f"Starting Docker container with command: {cmd}")
+        self.master_fd, self.slave_fd = pty.openpty()
+        self.container = subprocess.Popen(
+            shlex.split(cmd),
+            stdin=self.slave_fd,
+            stdout=self.slave_fd,
+            stderr=self.slave_fd,
+            text=True,
+            close_fds=True,
+        )
+        time.sleep(1)  # wait for the container to start
+        # TODO: use a more robust way to check if the container is ready
+        self.history: List[InputType | OutputType] = [
+            OutputType(self._wait_and_read_output())
+        ]
+
+    def _wait_and_read_output(self, user_input: str = None) -> str:
+        output_str = ""
+        while True:
+            readable, _, _ = select.select([self.master_fd], [], [], self.timeout)
+            if readable:
+                output = os.read(self.master_fd, 1024).decode()
+                if not output:
+                    break
+                output_str += output
+            else:
+                break
+        if user_input:
+            output_str = output_str.lstrip(user_input).lstrip()
+        return output_str
+
+    def execute(self, cmd: str) -> str:
+        os.write(self.master_fd, (cmd + "\n").encode())
+        self.history.append(InputType(cmd))
+
+        output = self._wait_and_read_output(cmd)
+        self.history.append(OutputType(output))
+        return output
+
+    def close(self):
+        if hasattr(self, "master_fd") and self.master_fd is not None:
+            os.close(self.master_fd)
+            self.master_fd = None
+        
+        if hasattr(self, "container") and self.container is not None:
+            self.container.terminate()
+            try:
+                self.container.wait(timeout=5)
+                print("Container stopped.")
+            except subprocess.TimeoutExpired:
+                self.container.kill()
+                print("Container killed.")
+            self.container = None
+
+    def __del__(self):
+        self.close()
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Interactive Docker container")
+    parser.add_argument(
+        "-d",
+        "--directory",
+        type=str,
+        default=None,
+        help="The directory to mount as the workspace in the Docker container.",
+    )
+    args = parser.parse_args()
+
+    docker_interactive = DockerInteractive(
+        workspace_dir=args.directory,
+        container_image="opendevin/sandbox:latest",
+    )
+    print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
+
+    for item in docker_interactive.history:
+        print(item.content, end="")
+    sys.stdout.flush()
+    try:
+        while True:
+            try:
+                user_input = input()
+            except EOFError:
+                print("\nExiting...")
+                break
+            if user_input.lower() == "exit":
+                print(f"Exiting...")
+                break
+            output = docker_interactive.execute(user_input)
+            print(output, end="")
+            sys.stdout.flush()
+    except KeyboardInterrupt:
+        print("\nExiting...")
+    docker_interactive.close()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000000..700ec2d821f6
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+litellm
+termcolor