New Agent, Action, Observation Abstraction with updated Controller (A…

…ll-Hands-AI#105) * rearrange workspace_dir and max_step as arguments to controller * remove unused output * abstract each action into dataclass * move actions * fix action import * move cmd manager and change method to private * move controller * rename action folder * add state * a draft of Controller & new agent abstraction * add agent actions * remove controller file * add observation to perform a refractor on langchains agent * revert to make this compatible via translation * fix typo and translate error * add error to observation * index thought as dict * refractor controller * fix circular dependency caused by type hint * add runnable attribute to agent * add mixin to denote executable * change baseclass * make file read/write action compatible w/ docker directory * remove event * fix some merge issue * fix sandbox w/ permission issue * cleanup history abstraction since langchains agent is not really using it * tweak to make langchains agent working * make all actions return observation * fix missing import * add echo action for agent * add error code to cmd output obs * make cmd manager returns cmd output obs * fix codeact agent to make it work * fix all ruff issue * fix mypy * add import agenthub back * add message for Action attribute (migrate from previous event) * fix typo * fix instruction setting * fix instruction setting * attempt to fix session * ruff fix * add .to_dict method for base and observation * add message for recall * try to simplify the state_updated_info with tuple of action and obs * update_info to Tuple[Action, Observation] * make codeact agent and langchains compatible with Tuple[Action, Observation] * fix ruff * fix ruff * change to base path to fix minimal langchains agent * add NullAction to potentially handle for chat scenario * Update opendevin/controller/command_manager.py Co-authored-by: Robert Brennan <accounts@rbren.io> * fix event args * set the default workspace to "workspace" * make directory relative (so it does not show up to agent in File*Action) * fix typo * await to yield for sending observation * fix message format --------- Co-authored-by: Robert Brennan <accounts@rbren.io>
preritt · Mar 25, 2024 · 82f934d · 82f934d
1 parent bc9c919
commit 82f934d
Show file tree

Hide file tree

Showing 28 changed files with 875 additions and 527 deletions.
diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py
@@ -2,12 +2,21 @@
 import re
 from litellm import completion
 from termcolor import colored
-from typing import List, Dict
+from typing import List, Mapping
+
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import (
+    Action,
+    CmdRunAction,
+    AgentEchoAction,
+    AgentFinishAction,
+)
+from opendevin.observation import (
+    CmdOutputObservation,
+    AgentMessageObservation,
+)
 
-from opendevin.agent import Agent, Message, Role
-from opendevin.lib.event import Event
-from opendevin.lib.command_manager import CommandManager
-from opendevin.sandbox.sandbox import DockerInteractive
 
 assert (
     "OPENAI_API_KEY" in os.environ
@@ -53,9 +62,7 @@ def parse_response(response) -> str:
 class CodeActAgent(Agent):
     def __init__(
         self,
-        instruction: str,
-        workspace_dir: str,
-        max_steps: int = 100
+        model_name: str
     ) -> None:
         """
         Initializes a new instance of the CodeActAgent class.
@@ -64,69 +71,68 @@ def __init__(
         - instruction (str): The instruction for the agent to execute.
         - max_steps (int): The maximum number of steps to run the agent.
         """
-        super().__init__(instruction, workspace_dir, max_steps)
-        self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)]
-        self._history.append(Message(Role.USER, instruction))
-        self.env = DockerInteractive(workspace_dir=workspace_dir)
-        print(colored("===USER:===\n" + instruction, "green"))
-
-    def _history_to_messages(self) -> List[Dict]:
-        return [message.to_dict() for message in self._history]
-
-    def run(self) -> None:
-        """
-        Starts the execution of the assigned instruction. This method should
-        be implemented by subclasses to define the specific execution logic.
-        """
-        for _ in range(self.max_steps):
-            response = completion(
-                messages=self._history_to_messages(),
-                model=self.model_name,
-                stop=["</execute>"],
-                temperature=0.0,
-                seed=42,
-            )
-            action = parse_response(response)
-            self._history.append(Message(Role.ASSISTANT, action))
-            print(colored("===ASSISTANT:===\n" + action, "yellow"))
-
-            command = re.search(r"<execute>(.*)</execute>", action, re.DOTALL)
-            if command is not None:
-                # a command was found
-                command_group = command.group(1)
-                if command_group.strip() == "exit":
-                    print(colored("Exit received. Exiting...", "red"))
-                    break
-                # execute the code
-                # TODO: does exit_code get loaded into Message?
-                exit_code, observation = self.env.execute(command_group)
-                self._history.append(Message(Role.ASSISTANT, observation))
-                print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
-            else:
-                # we could provide a error message for the model to continue similar to
-                # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
-                observation = INVALID_INPUT_MESSAGE
-                self._history.append(Message(Role.ASSISTANT, observation))
-                print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
-
-        self.env.close()
-
-    def chat(self, message: str) -> None:
-        """
-        Optional method for interactive communication with the agent during its execution. Implementations
-        can use this method to modify the agent's behavior or state based on chat inputs.
+        super().__init__(model_name)
+        self.messages: List[Mapping[str, str]] = []
+        self.instruction: str = ""
+
+    def step(self, state: State) -> Action:
+        if len(self.messages) == 0:
+            assert self.instruction, "Expecting instruction to be set"
+            self.messages = [
+                {"role": "system", "content": SYSTEM_MESSAGE},
+                {"role": "user", "content": self.instruction},
+            ]
+            print(colored("===USER:===\n" + self.instruction, "green"))
+
+        updated_info = state.updated_info
+
+        if updated_info:
+            for prev_action, obs in updated_info:
+                assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
+
+                if isinstance(obs, AgentMessageObservation):  # warning message from itself
+                    self.messages.append({"role": "user", "content": obs.content})
+                    print(colored("===USER:===\n" + obs.content, "green"))
+                elif isinstance(obs, CmdOutputObservation):
+                    content = "OBSERVATION:\n" + obs.content
+                    content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
+                    self.messages.append({"role": "user", "content": content})
+                    print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
+                else:
+                    raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
+
+        response = completion(
+            messages=self.messages,
+            model=self.model_name,
+            stop=["</execute>"],
+            temperature=0.0,
+            seed=42,
+        )
+        action_str: str = parse_response(response)
+        self.messages.append({"role": "assistant", "content": action_str})
+        print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
+
+        command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
+        if command is not None:
+            # a command was found
+            command_group = command.group(1)
+            if command_group.strip() == "exit":
+                print(colored("Exit received. Exiting...", "red"))
+                return AgentFinishAction()
+            return CmdRunAction(command = command_group)
+            # # execute the code
+            # # TODO: does exit_code get loaded into Message?
+            # exit_code, observation = self.env.execute(command_group)
+            # self._history.append(Message(Role.ASSISTANT, observation))
+            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+        else:
+            # we could provide a error message for the model to continue similar to
+            # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
+            # observation = INVALID_INPUT_MESSAGE
+            # self._history.append(Message(Role.ASSISTANT, observation))
+            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+            return AgentEchoAction(content=INVALID_INPUT_MESSAGE)  # warning message to itself
 
-        Parameters:
-        - message (str): The chat message or command.
-        """
-        raise NotImplementedError
-
-    # TODO: implement these abstract methods
-    def add_event(self, event: Event) -> None:
-        raise NotImplementedError("Implement this abstract method")
-
-    def step(self, cmd_mgr: CommandManager) -> Event:
-        raise NotImplementedError("Implement this abstract method")
 
     def search_memory(self, query: str) -> List[str]:
         raise NotImplementedError("Implement this abstract method")

diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py
@@ -1,8 +1,28 @@
-from typing import List, Any
+from typing import List, Dict, Type
 
+import agenthub.langchains_agent.utils.llm as llm
 from opendevin.agent import Agent
-from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
-from opendevin.lib.event import Event
+from opendevin.action import (
+    Action,
+    CmdRunAction,
+    CmdKillAction,
+    BrowseURLAction,
+    FileReadAction,
+    FileWriteAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    AgentFinishAction,
+)
+from opendevin.observation import (
+    Observation,
+    CmdOutputObservation,
+    BrowserOutputObservation,
+)
+from opendevin.state import State
+
+from agenthub.langchains_agent.utils.monologue import Monologue
+from agenthub.langchains_agent.utils.memory import LongTermMemory
+
 
 INITIAL_THOUGHTS = [
     "I exist!",
@@ -43,59 +63,135 @@
 ]
 
 
+MAX_OUTPUT_LENGTH = 5000
+MAX_MONOLOGUE_LENGTH = 20000
+
+
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+    "run": CmdRunAction,
+    "kill": CmdKillAction,
+    "browse": BrowseURLAction,
+    "read": FileReadAction,
+    "write": FileWriteAction,
+    "recall": AgentRecallAction,
+    "think": AgentThinkAction,
+    "finish": AgentFinishAction,
+}
+
+CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
+
 class LangchainsAgent(Agent):
     _initialized = False
-    agent: Any = None
+
+    def __init__(self, model_name: str):
+        super().__init__(model_name)
+        self.monologue = Monologue(self.model_name)
+        self.memory = LongTermMemory()
+
+    def _add_event(self, event: dict):
+        if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
+            event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
+
+        self.monologue.add_event(event)
+        self.memory.add_event(event)
+        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
+            self.monologue.condense()
 
     def _initialize(self):
         if self._initialized:
             return
+
         if self.instruction is None or self.instruction == "":
             raise ValueError("Instruction must be provided")
-        self.agent = LangchainsAgentImpl(self.instruction, self.model_name)
+
         next_is_output = False
         for thought in INITIAL_THOUGHTS:
             thought = thought.replace("$TASK", self.instruction)
             if next_is_output:
-                event = Event("output", {"output": thought})
+                d = {"action": "output", "args": {"output": thought}}
                 next_is_output = False
             else:
                 if thought.startswith("RUN"):
                     command = thought.split("RUN ")[1]
-                    event = Event("run", {"command": command})
+                    d = {"action": "run", "args": {"command": command}}
                     next_is_output = True
+
                 elif thought.startswith("RECALL"):
                     query = thought.split("RECALL ")[1]
-                    event = Event("recall", {"query": query})
+                    d = {"action": "recall", "args": {"query": query}}
                     next_is_output = True
+
                 elif thought.startswith("BROWSE"):
                     url = thought.split("BROWSE ")[1]
-                    event = Event("browse", {"url": url})
+                    d = {"action": "browse", "args": {"url": url}}
                     next_is_output = True
                 else:
-                    event = Event("think", {"thought": thought})
-            self.agent.add_event(event)
-        self._initialized = True
+                    d = {"action": "think", "args": {"thought": thought}}
 
-    def add_event(self, event: Event) -> None:
-        if self.agent:
-            self.agent.add_event(event)
+        self._add_event(d)
+        self._initialized = True
 
-    def step(self, cmd_mgr) -> Event:
+    def step(self, state: State) -> Action:
         self._initialize()
-        return self.agent.get_next_action(cmd_mgr)
+        # TODO: make langchains agent use Action & Observation
+        # completly from ground up
 
-    def search_memory(self, query: str) -> List[str]:
-        return self.agent.memory.search(query)
+        # Translate state to action_dict
+        for prev_action, obs in state.updated_info:
+            if isinstance(obs, CmdOutputObservation):
+                if obs.error:
+                    d = {"action": "error", "args": {"output": obs.content}}
+                else:
+                    d = {"action": "output", "args": {"output": obs.content}}
+            # elif isinstance(obs, UserMessageObservation):
+            #     d = {"action": "output", "args": {"output": obs.message}}
+            # elif isinstance(obs, AgentMessageObservation):
+            #     d = {"action": "output", "args": {"output": obs.message}}
+            elif isinstance(obs, (BrowserOutputObservation, Observation)):
+                d = {"action": "output", "args": {"output": obs.content}}
+            else:
+                raise NotImplementedError(f"Unknown observation type: {obs}")
+            self._add_event(d)
 
-    def chat(self, message: str) -> None:
-        """
-        Optional method for interactive communication with the agent during its execution. Implementations
-        can use this method to modify the agent's behavior or state based on chat inputs.
 
-        Parameters:
-        - message (str): The chat message or command.
-        """
-        raise NotImplementedError
+            if isinstance(prev_action, CmdRunAction):
+                d = {"action": "run", "args": {"command": prev_action.command}}
+            elif isinstance(prev_action, CmdKillAction):
+                d = {"action": "kill", "args": {"id": prev_action.id}}
+            elif isinstance(prev_action, BrowseURLAction):
+                d = {"action": "browse", "args": {"url": prev_action.url}}
+            elif isinstance(prev_action, FileReadAction):
+                d = {"action": "read", "args": {"file": prev_action.path}}
+            elif isinstance(prev_action, FileWriteAction):
+                d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
+            elif isinstance(prev_action, AgentRecallAction):
+                d = {"action": "recall", "args": {"query": prev_action.query}}
+            elif isinstance(prev_action, AgentThinkAction):
+                d = {"action": "think", "args": {"thought": prev_action.thought}}
+            elif isinstance(prev_action, AgentFinishAction):
+                d = {"action": "finish"}
+            else:
+                raise NotImplementedError(f"Unknown action type: {prev_action}")
+            self._add_event(d)
+
+        state.updated_info = []
+
+        action_dict = llm.request_action(
+            self.instruction,
+            self.monologue.get_thoughts(),
+            self.model_name,
+            state.background_commands_obs,
+        )
+        if action_dict is None:
+            action_dict = {"action": "think", "args": {"thought": "..."}}
+
+        # Translate action_dict to Action
+        action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
+        self.latest_action = action
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return self.memory.search(query)
+
 
 Agent.register("LangchainsAgent", LangchainsAgent)
diff --git a/agenthub/langchains_agent/utils/agent.py b/agenthub/langchains_agent/utils/agent.py