Add logging (All-Hands-AI#660)

* Add logging config for the app and for llm debug * - switch to python, add special llm logger - add logging to sandbox.py - add session.py - add a directory per session - small additions for AgentController * - add sys log, but try to exclude litellm; log llm responses as json * Update opendevin/_logging.py Co-authored-by: Anas DORBANI <95044293+dorbanianas@users.noreply.github.com> * - use standard file naming - quick pass through a few more files * fix ruff * clean up * mypy types * make mypy happy --------- Co-authored-by: Anas DORBANI <95044293+dorbanianas@users.noreply.github.com>
zenetio · Apr 7, 2024 · 4b4ce20 · 4b4ce20
1 parent d87a7dd
commit 4b4ce20
Show file tree

Hide file tree

Showing 11 changed files with 347 additions and 189 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -32,9 +32,12 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: 3.11
+      - name: Create mypy cache directory
+        run: mkdir -p .mypy_cache
       - name: Install dependencies
-        run: pip install ruff mypy
+        run: pip install ruff mypy types-PyYAML types-toml
+      - name: Run mypy
+        run: python -m mypy --install-types --non-interactive --config-file dev_config/python/mypy.ini opendevin/ agenthub/
       - name: Run ruff
         run: ruff check --config dev_config/python/ruff.toml opendevin/ agenthub/
-      - name: Run mypy
-        run: mypy --install-types --non-interactive --config-file dev_config/python/mypy.ini opendevin/ agenthub/
+
diff --git a/opendevin/config.py b/opendevin/config.py
@@ -16,7 +16,6 @@
     'LLM_NUM_RETRIES': 6,
     'LLM_COOLDOWN_TIME': 1,
     'DIRECTORY_REWRITE': '',
-    'PROMPT_DEBUG_DIR': '',
     'MAX_ITERATIONS': 100,
 }
 

diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py
@@ -17,50 +17,51 @@
 )
 from opendevin.observation import Observation, AgentErrorObservation, NullObservation
 from opendevin import config
+from opendevin.logging import opendevin_logger as logger
 
 from .command_manager import CommandManager
 
 
 ColorType = Literal[
-    "red",
-    "green",
-    "yellow",
-    "blue",
-    "magenta",
-    "cyan",
-    "light_grey",
-    "dark_grey",
-    "light_red",
-    "light_green",
-    "light_yellow",
-    "light_blue",
-    "light_magenta",
-    "light_cyan",
-    "white",
+    'red',
+    'green',
+    'yellow',
+    'blue',
+    'magenta',
+    'cyan',
+    'light_grey',
+    'dark_grey',
+    'light_red',
+    'light_green',
+    'light_yellow',
+    'light_blue',
+    'light_magenta',
+    'light_cyan',
+    'white',
 ]
 
 
 DISABLE_COLOR_PRINTING = (
-    config.get_or_default("DISABLE_COLOR", "false").lower() == "true"
+    config.get_or_default('DISABLE_COLOR', 'false').lower() == 'true'
 )
-MAX_ITERATIONS = config.get("MAX_ITERATIONS")
+MAX_ITERATIONS = config.get('MAX_ITERATIONS')
 
 
-def print_with_color(text: Any, print_type: str = "INFO"):
+def print_with_color(text: Any, print_type: str = 'INFO'):
     TYPE_TO_COLOR: Mapping[str, ColorType] = {
-        "BACKGROUND LOG": "blue",
-        "ACTION": "green",
-        "OBSERVATION": "yellow",
-        "INFO": "cyan",
-        "ERROR": "red",
-        "PLAN": "light_magenta",
+        'BACKGROUND LOG': 'blue',
+        'ACTION': 'green',
+        'OBSERVATION': 'yellow',
+        'INFO': 'cyan',
+        'ERROR': 'red',
+        'PLAN': 'light_magenta',
     }
-    color = TYPE_TO_COLOR.get(print_type.upper(), TYPE_TO_COLOR["INFO"])
+    color = TYPE_TO_COLOR.get(print_type.upper(), TYPE_TO_COLOR['INFO'])
     if DISABLE_COLOR_PRINTING:
         print(f"\n{print_type.upper()}:\n{str(text)}", flush=True)
     else:
         print(
-            colored(f"\n{print_type.upper()}:\n", color, attrs=["bold"])
+            colored(f"\n{print_type.upper()}:\n", color, attrs=['bold'])
             + colored(str(text), color),
             flush=True,
         )
@@ -73,7 +74,7 @@ def __init__(
         self,
         agent: Agent,
         workdir: str,
-        id: str = "",
+        id: str = '',
         max_iterations: int = MAX_ITERATIONS,
         container_image: str | None = None,
         callbacks: List[Callable] = [],
@@ -82,7 +83,8 @@ def __init__(
         self.agent = agent
         self.max_iterations = max_iterations
         self.workdir = workdir
-        self.command_manager = CommandManager(self.id, workdir, container_image)
+        self.command_manager = CommandManager(
+            self.id, workdir, container_image)
         self.callbacks = callbacks
 
     def update_state_for_step(self, i):
@@ -94,9 +96,9 @@ def update_state_after_step(self):
 
     def add_history(self, action: Action, observation: Observation):
         if not isinstance(action, Action):
-            raise ValueError("action must be an instance of Action")
+            raise ValueError('action must be an instance of Action')
         if not isinstance(observation, Observation):
-            raise ValueError("observation must be an instance of Observation")
+            raise ValueError('observation must be an instance of Observation')
         self.state.history.append((action, observation))
         self.state.updated_info.append((action, observation))
 
@@ -108,61 +110,62 @@ async def start_loop(self, task: str):
             try:
                 finished = await self.step(i)
             except Exception as e:
-                print("Error in loop", e, flush=True)
+                logger.error('Error in loop', exc_info=True)
                 raise e
             if finished:
                 break
         if not finished:
-            print("Exited before finishing", flush=True)
+            logger.info('Exited before finishing the task.')
 
     async def step(self, i: int):
-        print("\n\n==============", flush=True)
-        print("STEP", i, flush=True)
-        print_with_color(self.state.plan.main_goal, "PLAN")
+        print('\n\n==============', flush=True)
+        print('STEP', i, flush=True)
+        print_with_color(self.state.plan.main_goal, 'PLAN')
 
         log_obs = self.command_manager.get_background_obs()
         for obs in log_obs:
             self.add_history(NullAction(), obs)
             await self._run_callbacks(obs)
-            print_with_color(obs, "BACKGROUND LOG")
+            print_with_color(obs, 'BACKGROUND LOG')
 
         self.update_state_for_step(i)
         action: Action = NullAction()
-        observation: Observation = NullObservation("")
+        observation: Observation = NullObservation('')
         try:
             action = self.agent.step(self.state)
             if action is None:
-                raise ValueError("Agent must return an action")
-            print_with_color(action, "ACTION")
+                raise ValueError('Agent must return an action')
+            print_with_color(action, 'ACTION')
         except Exception as e:
             observation = AgentErrorObservation(str(e))
-            print_with_color(observation, "ERROR")
+            print_with_color(observation, 'ERROR')
             traceback.print_exc()
             # TODO Change to more robust error handling
-            if "The api_key client option must be set" in observation.content:
+            if 'The api_key client option must be set' in observation.content:
                 raise
         self.update_state_after_step()
 
         await self._run_callbacks(action)
 
         finished = isinstance(action, AgentFinishAction)
         if finished:
-            print_with_color(action, "INFO")
+            print_with_color(action, 'INFO')
             return True
 
         if isinstance(action, AddTaskAction):
             try:
-                self.state.plan.add_subtask(action.parent, action.goal, action.subtasks)
+                self.state.plan.add_subtask(
+                    action.parent, action.goal, action.subtasks)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
         elif isinstance(action, ModifyTaskAction):
             try:
                 self.state.plan.set_subtask_state(action.id, action.state)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
 
         if action.executable:
@@ -173,11 +176,11 @@ async def step(self, i: int):
                     observation = action.run(self)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
 
         if not isinstance(observation, NullObservation):
-            print_with_color(observation, "OBSERVATION")
+            print_with_color(observation, 'OBSERVATION')
 
         self.add_history(action, observation)
         await self._run_callbacks(observation)
@@ -189,8 +192,8 @@ async def _run_callbacks(self, event):
             idx = self.callbacks.index(callback)
             try:
                 callback(event)
-            except Exception as e:
-                print("Callback error:" + str(idx), e, flush=True)
+            except Exception:
+                logger.exception('Callback error: %s', idx)
                 pass
         await asyncio.sleep(
             0.001

diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
@@ -1,70 +1,65 @@
-import os
-import uuid
+from datetime import datetime
 
 from litellm.router import Router
 from functools import partial
 
 from opendevin import config
+from opendevin.logging import llm_prompt_logger, llm_response_logger
+
+DEFAULT_API_KEY = config.get('LLM_API_KEY')
+DEFAULT_BASE_URL = config.get('LLM_BASE_URL')
+DEFAULT_MODEL_NAME = config.get('LLM_MODEL')
+DEFAULT_LLM_NUM_RETRIES = config.get('LLM_NUM_RETRIES')
+DEFAULT_LLM_COOLDOWN_TIME = config.get('LLM_COOLDOWN_TIME')
 
-DEFAULT_API_KEY = config.get("LLM_API_KEY")
-DEFAULT_BASE_URL = config.get("LLM_BASE_URL")
-DEFAULT_MODEL_NAME = config.get("LLM_MODEL")
-DEFAULT_LLM_NUM_RETRIES = config.get("LLM_NUM_RETRIES")
-DEFAULT_LLM_COOLDOWN_TIME = config.get("LLM_COOLDOWN_TIME")
-PROMPT_DEBUG_DIR = config.get("PROMPT_DEBUG_DIR")
 
 class LLM:
     def __init__(self,
-            model=DEFAULT_MODEL_NAME,
-            api_key=DEFAULT_API_KEY,
-            base_url=DEFAULT_BASE_URL,
-            num_retries=DEFAULT_LLM_NUM_RETRIES,
-            cooldown_time=DEFAULT_LLM_COOLDOWN_TIME,
-            debug_dir=PROMPT_DEBUG_DIR
-    ):
+                 model=DEFAULT_MODEL_NAME,
+                 api_key=DEFAULT_API_KEY,
+                 base_url=DEFAULT_BASE_URL,
+                 num_retries=DEFAULT_LLM_NUM_RETRIES,
+                 cooldown_time=DEFAULT_LLM_COOLDOWN_TIME,
+                 ):
         self.model_name = model if model else DEFAULT_MODEL_NAME
         self.api_key = api_key if api_key else DEFAULT_API_KEY
         self.base_url = base_url if base_url else DEFAULT_BASE_URL
         self.num_retries = num_retries if num_retries else DEFAULT_LLM_NUM_RETRIES
         self.cooldown_time = cooldown_time if cooldown_time else DEFAULT_LLM_COOLDOWN_TIME
-        self._debug_dir = debug_dir if debug_dir else PROMPT_DEBUG_DIR
-        self._debug_idx = 0
-        self._debug_id = uuid.uuid4().hex
+        self._debug_id = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 
-        # We use litellm's Router in order to support retries (especially rate limit backoff retries). 
+        # We use litellm's Router in order to support retries (especially rate limit backoff retries).
         # Typically you would use a whole model list, but it's unnecessary with our implementation's structure
         self._router = Router(
             model_list=[{
-                "model_name": self.model_name,
-                "litellm_params": {
-                    "model": self.model_name,
-                    "api_key": self.api_key,
-                    "api_base": self.base_url
+                'model_name': self.model_name,
+                'litellm_params': {
+                    'model': self.model_name,
+                    'api_key': self.api_key,
+                    'api_base': self.base_url
                 }
             }],
             num_retries=self.num_retries,
-            allowed_fails=self.num_retries, # We allow all retries to fail, so they can retry instead of going into "cooldown"
+            # We allow all retries to fail, so they can retry instead of going into "cooldown"
+            allowed_fails=self.num_retries,
             cooldown_time=self.cooldown_time
         )
-        self._completion = partial(self._router.completion, model=self.model_name)
+        self._completion = partial(
+            self._router.completion, model=self.model_name)
+
+        completion_unwrapped = self._completion
 
-        if self._debug_dir:
-            print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")
-            completion_unwrapped = self._completion
-            def wrapper(*args, **kwargs):
-                dir = self._debug_dir + "/" + self._debug_id + "/" + str(self._debug_idx)
-                os.makedirs(dir, exist_ok=True)
-                if "messages" in kwargs:
-                    messages = kwargs["messages"]
-                else:
-                    messages = args[1]
-                self.write_debug_prompt(dir, messages)
-                resp = completion_unwrapped(*args, **kwargs)
-                message_back = resp['choices'][0]['message']['content']
-                self.write_debug_response(dir, message_back)
-                self._debug_idx += 1
-                return resp
-            self._completion = wrapper # type: ignore
+        def wrapper(*args, **kwargs):
+            if 'messages' in kwargs:
+                messages = kwargs['messages']
+            else:
+                messages = args[1]
+            llm_prompt_logger.debug(messages)
+            resp = completion_unwrapped(*args, **kwargs)
+            message_back = resp['choices'][0]['message']['content']
+            llm_response_logger.debug(message_back)
+            return resp
+        self._completion = wrapper  # type: ignore
 
     @property
     def completion(self):
@@ -73,14 +68,3 @@ def completion(self):
         """
         return self._completion
 
-    def write_debug_prompt(self, dir, messages):
-        prompt_out = ""
-        for message in messages:
-            prompt_out += "<" + message["role"] + ">\n"
-            prompt_out += message["content"] + "\n\n"
-        with open(f"{dir}/prompt.md", "w") as f:
-            f.write(prompt_out)
-
-    def write_debug_response(self, dir, response):
-        with open(f"{dir}/response.md", "w") as f:
-            f.write(response)