Implement deserialization for actions and observations (All-Hands-AI#359

) * action deserializing * add observation deserialization * add tests * refactor agents with serialization * fix some errors * fix lint * fix json parser
preritt · Mar 30, 2024 · effac86 · effac86
1 parent f68ee45
commit effac86
Show file tree

Hide file tree

Showing 19 changed files with 318 additions and 328 deletions.
diff --git a/agenthub/langchains_agent/langchains_agent.py b/agenthub/langchains_agent/langchains_agent.py
@@ -7,22 +7,6 @@
 from agenthub.langchains_agent.utils.monologue import Monologue
 from agenthub.langchains_agent.utils.memory import LongTermMemory
 
-from opendevin.action import (
-    NullAction,
-    CmdRunAction,
-    CmdKillAction,
-    BrowseURLAction,
-    FileReadAction,
-    FileWriteAction,
-    AgentRecallAction,
-    AgentThinkAction,
-    AgentFinishAction,
-)
-from opendevin.observation import (
-    CmdOutputObservation,
-)
-
-
 MAX_MONOLOGUE_LENGTH = 20000
 MAX_OUTPUT_LENGTH = 5000
 
@@ -81,7 +65,7 @@ def __init__(self, llm: LLM):
         self.memory = LongTermMemory()
 
     def _add_event(self, event: dict):
-        if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
+        if 'args' in event and 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
             event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
 
         self.monologue.add_event(event)
@@ -136,45 +120,9 @@ def _initialize(self, task):
 
     def step(self, state: State) -> Action:
         self._initialize(state.plan.main_goal)
-        # TODO: make langchains agent use Action & Observation
-        # completly from ground up
-
-        # Translate state to action_dict
         for prev_action, obs in state.updated_info:
-            d = None
-            if isinstance(obs, CmdOutputObservation):
-                if obs.error:
-                    d = {"action": "error", "args": {"output": obs.content}}
-                else:
-                    d = {"action": "output", "args": {"output": obs.content}}
-            else:
-                d = {"action": "output", "args": {"output": obs.content}}
-            if d is not None:
-                self._add_event(d)
-
-            d = None
-            if isinstance(prev_action, CmdRunAction):
-                d = {"action": "run", "args": {"command": prev_action.command}}
-            elif isinstance(prev_action, CmdKillAction):
-                d = {"action": "kill", "args": {"id": prev_action.id}}
-            elif isinstance(prev_action, BrowseURLAction):
-                d = {"action": "browse", "args": {"url": prev_action.url}}
-            elif isinstance(prev_action, FileReadAction):
-                d = {"action": "read", "args": {"file": prev_action.path}}
-            elif isinstance(prev_action, FileWriteAction):
-                d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
-            elif isinstance(prev_action, AgentRecallAction):
-                d = {"action": "recall", "args": {"query": prev_action.query}}
-            elif isinstance(prev_action, AgentThinkAction):
-                d = {"action": "think", "args": {"thought": prev_action.thought}}
-            elif isinstance(prev_action, AgentFinishAction):
-                d = {"action": "finish"}
-            elif isinstance(prev_action, NullAction):
-                d = None
-            else:
-                raise ValueError(f"Unknown action type: {prev_action}")
-            if d is not None:
-                self._add_event(d)
+            self._add_event(prev_action.to_dict())
+            self._add_event(obs.to_dict())
 
         state.updated_info = []
 

diff --git a/agenthub/langchains_agent/utils/json.py b/agenthub/langchains_agent/utils/json.py
@@ -6,3 +6,7 @@ def my_encoder(obj):
 
 def dumps(obj, **kwargs):
     return json.dumps(obj, default=my_encoder, **kwargs)
+
+def loads(s, **kwargs):
+    return json.loads(s, **kwargs)
+
diff --git a/agenthub/langchains_agent/utils/memory.py b/agenthub/langchains_agent/utils/memory.py
@@ -48,11 +48,20 @@ def __init__(self):
         self.thought_idx = 0
 
     def add_event(self, event):
+        id = ""
+        t = ""
+        if "action" in event:
+            t = "action"
+            id = event["action"]
+        elif "observation" in event:
+            t = "observation"
+            id = event["observation"]
         doc = Document(
             text=json.dumps(event),
             doc_id=str(self.thought_idx),
             extra_info={
-                "type": event["action"],
+                "type": t,
+                "id": id,
                 "idx": self.thought_idx,
             },
         )

diff --git a/agenthub/langchains_agent/utils/prompts.py b/agenthub/langchains_agent/utils/prompts.py
@@ -1,8 +1,6 @@
-from typing import List, Dict, Type
+from typing import List
 
-from langchain_core.pydantic_v1 import BaseModel
 from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import JsonOutputParser
 
 from opendevin import config
 
@@ -13,35 +11,13 @@
 from . import json
 
 from opendevin.action import (
+    action_from_dict,
     Action,
-    CmdRunAction,
-    CmdKillAction,
-    BrowseURLAction,
-    FileReadAction,
-    FileWriteAction,
-    AgentRecallAction,
-    AgentThinkAction,
-    AgentFinishAction,
-    AgentSummarizeAction,
 )
 from opendevin.observation import (
     CmdOutputObservation,
 )
 
-
-ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
-    "run": CmdRunAction,
-    "kill": CmdKillAction,
-    "browse": BrowseURLAction,
-    "read": FileReadAction,
-    "write": FileWriteAction,
-    "recall": AgentRecallAction,
-    "think": AgentThinkAction,
-    "summarize": AgentSummarizeAction,
-    "finish": AgentFinishAction,
-}
-CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
-
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is to {task}.
 Don't expand the scope of your task--just complete it as written.
@@ -116,15 +92,6 @@
 """
 
 
-class _ActionDict(BaseModel):
-    action: str
-    args: dict
-
-
-class NewMonologue(BaseModel):
-    new_monologue: List[_ActionDict]
-
-
 def get_summarize_monologue_prompt(thoughts):
     prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT)
     return prompt.format(monologue=json.dumps({'old_monologue': thoughts}, indent=2))
@@ -137,13 +104,14 @@ def get_request_action_prompt(
     hint = ''
     if len(thoughts) > 0:
         latest_thought = thoughts[-1]
-        if latest_thought["action"] == 'think':
-            if latest_thought["args"]['thought'].startswith("OK so my task is"):
-                hint = "You're just getting started! What should you do first?"
-            else:
-                hint = "You've been thinking a lot lately. Maybe it's time to take action?"
-        elif latest_thought["action"] == 'error':
-            hint = "Looks like that last command failed. Maybe you need to fix it, or try something else."
+        if "action" in latest_thought:
+            if latest_thought["action"] == 'think':
+                if latest_thought["args"]['thought'].startswith("OK so my task is"):
+                    hint = "You're just getting started! What should you do first?"
+                else:
+                    hint = "You've been thinking a lot lately. Maybe it's time to take action?"
+            elif latest_thought["action"] == 'error':
+                hint = "Looks like that last command failed. Maybe you need to fix it, or try something else."
 
     bg_commands_message = ""
     if len(background_commands_obs) > 0:
@@ -162,17 +130,15 @@ def get_request_action_prompt(
     )
 
 def parse_action_response(response: str) -> Action:
-    parser = JsonOutputParser(pydantic_object=_ActionDict)
-    action_dict = parser.parse(response)
+    json_start = response.find("{")
+    json_end = response.rfind("}") + 1
+    response = response[json_start:json_end]
+    action_dict = json.loads(response)
     if 'content' in action_dict:
         # The LLM gets confused here. Might as well be robust
         action_dict['contents'] = action_dict.pop('content')
+    return action_from_dict(action_dict)
 
-    action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
-    return action
-
-def parse_summary_response(response: str) -> List[Action]:
-    parser = JsonOutputParser(pydantic_object=NewMonologue)
-    parsed = parser.parse(response)
-    #thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']]
+def parse_summary_response(response: str) -> List[dict]:
+    parsed = json.loads(response)
     return parsed['new_monologue']
diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py
@@ -3,7 +3,7 @@
 
 from opendevin.controller.agent_controller import print_with_indent
 from opendevin.plan import Plan
-from opendevin.action import Action
+from opendevin.action import Action, action_from_dict
 from opendevin.observation import Observation
 
 from opendevin.action import (
@@ -136,15 +136,10 @@ def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
     latest_action: Action = NullAction()
     for action, observation in sub_history:
         if not isinstance(action, NullAction):
-            #if not isinstance(action, ModifyTaskAction) and not isinstance(action, AddTaskAction):
-            action_dict = action.to_dict()
-            action_dict["action"] = convert_action(action_dict["action"])
-            history_dicts.append(action_dict)
+            history_dicts.append(action.to_dict())
             latest_action = action
         if not isinstance(observation, NullObservation):
-            observation_dict = observation.to_dict()
-            observation_dict["observation"] = convert_observation(observation_dict["observation"])
-            history_dicts.append(observation_dict)
+            history_dicts.append(observation.to_dict())
     history_str = json.dumps(history_dicts, indent=2)
 
     hint = ""
@@ -157,7 +152,7 @@ def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
         plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
         hint = plan_status
 
-    latest_action_id = convert_action(latest_action.to_dict()["action"])
+    latest_action_id = latest_action.to_dict()['action']
 
     if current_task is not None:
         if latest_action_id == "":
@@ -200,43 +195,6 @@ def parse_response(response: str) -> Action:
     if 'content' in action_dict:
         # The LLM gets confused here. Might as well be robust
         action_dict['contents'] = action_dict.pop('content')
-
-    args_dict = action_dict.get("args", {})
-    action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**args_dict)
-    return action
-
-def convert_action(action):
-    if action == "CmdRunAction":
-        action = "run"
-    elif action == "CmdKillAction":
-        action = "kill"
-    elif action == "BrowseURLAction":
-        action = "browse"
-    elif action == "FileReadAction":
-        action = "read"
-    elif action == "FileWriteAction":
-        action = "write"
-    elif action == "AgentFinishAction":
-        action = "finish"
-    elif action == "AgentRecallAction":
-        action = "recall"
-    elif action == "AgentThinkAction":
-        action = "think"
-    elif action == "AgentSummarizeAction":
-        action = "summarize"
-    elif action == "AddTaskAction":
-        action = "add_task"
-    elif action == "ModifyTaskAction":
-        action = "modify_task"
+    action = action_from_dict(action_dict)
     return action
 
-def convert_observation(observation):
-    if observation == "UserMessageObservation":
-        observation = "chat"
-    elif observation == "AgentMessageObservation":
-        observation = "chat"
-    elif observation == "CmdOutputObservation":
-        observation = "run"
-    elif observation == "FileReadObservation":
-        observation = "read"
-    return observation
diff --git a/opendevin/action/__init__.py b/opendevin/action/__init__.py
@@ -13,18 +13,22 @@
     FileWriteAction,
     AgentRecallAction,
     AgentThinkAction,
-    AgentFinishAction
+    AgentFinishAction,
+    AddTaskAction,
+    ModifyTaskAction,
 )
 
 ACTION_TYPE_TO_CLASS = {action_class.action:action_class for action_class in actions} # type: ignore[attr-defined]
 
-def action_class_initialize_dispatcher(action: str, *args: str, **kwargs: str) -> Action:
-    action_class = ACTION_TYPE_TO_CLASS.get(action)
+def action_from_dict(action: dict) -> Action:
+    action = action.copy()
+    if "action" not in action:
+        raise KeyError(f"'action' key is not found in {action=}")
+    action_class = ACTION_TYPE_TO_CLASS.get(action["action"])
     if action_class is None:
-        raise KeyError(f"'{action=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}")
-    return action_class(*args, **kwargs)
-
-CLASS_TO_ACTION_TYPE = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
+        raise KeyError(f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}")
+    args = action.get("args", {})
+    return action_class(**args)
 
 __all__ = [
     "Action",

diff --git a/opendevin/action/agent.py b/opendevin/action/agent.py
@@ -25,7 +25,6 @@ def message(self) -> str:
 @dataclass
 class AgentThinkAction(NotExecutableAction):
     thought: str
-    runnable: bool = False
     action: str = "think"
 
     def run(self, controller: "AgentController") -> "Observation":
@@ -38,7 +37,6 @@ def message(self) -> str:
 @dataclass
 class AgentEchoAction(ExecutableAction):
     content: str
-    runnable: bool = True
     action: str = "echo"
 
     def run(self, controller: "AgentController") -> "Observation":
@@ -60,7 +58,6 @@ def message(self) -> str:
 
 @dataclass
 class AgentFinishAction(NotExecutableAction):
-    runnable: bool = False
     action: str = "finish"
 
     def run(self, controller: "AgentController") -> "Observation":

diff --git a/opendevin/action/base.py b/opendevin/action/base.py
@@ -26,8 +26,6 @@ def executable(self) -> bool:
     def message(self) -> str:
         raise NotImplementedError
 
-
-
 @dataclass
 class ExecutableAction(Action):
     @property