feat: specialize CodeAct into micro agents by providing markdown files (

#3511) * update microagent name and update template.toml * substitute actual micro_agent_name for prompt manager * add python-frontmatter * support micro agent in codeact * add test cases * add instruction from require env var * add draft gh micro agent * update poetry lock * update poetry lock
All-Hands-AI · Aug 28, 2024 · d9a8b53 · d9a8b53
1 parent 653bc4e
commit d9a8b53
Show file tree

Hide file tree

Showing 11 changed files with 273 additions and 21 deletions.
diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -27,6 +27,7 @@
     JupyterRequirement,
     PluginRequirement,
 )
+from openhands.utils.microagent import MicroAgent
 from openhands.utils.prompt import PromptManager
 
 
@@ -73,10 +74,21 @@ def __init__(
         """
         super().__init__(llm, config)
         self.reset()
+
+        self.micro_agent = (
+            MicroAgent(
+                os.path.join(
+                    os.path.dirname(__file__), 'micro', f'{config.micro_agent_name}.md'
+                )
+            )
+            if config.micro_agent_name
+            else None
+        )
+
         self.prompt_manager = PromptManager(
             prompt_dir=os.path.join(os.path.dirname(__file__)),
             agent_skills_docs=AgentSkillsRequirement.documentation,
-            micro_agent_name=None,  # TODO: implement micro-agent
+            micro_agent=self.micro_agent,
         )
 
     def action_to_str(self, action: Action) -> str:

diff --git a/agenthub/codeact_agent/micro/github.md b/agenthub/codeact_agent/micro/github.md
@@ -0,0 +1,59 @@
+---
+name: github
+agent: CodeActAgent
+require_env_var:
+    SANDBOX_ENV_GITHUB_TOKEN: "Create a GitHub Personal Access Token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and set it as SANDBOX_GITHUB_TOKEN in your environment variables."
+---
+
+# How to Interact with Github
+
+## Environment Variable Available
+
+1. `GITHUB_TOKEN`: A read-only token for Github.
+
+## Using GitHub's RESTful API
+
+Use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. Here are some common operations:
+
+1. View an issue:
+   ```
+   curl -H "Authorization: token $GITHUB_TOKEN" \
+        https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}
+   ```
+
+2. List repository issues:
+   ```
+   curl -H "Authorization: token $GITHUB_TOKEN" \
+        https://api.github.com/repos/{owner}/{repo}/issues
+   ```
+
+3. Get repository details:
+   ```
+   curl -H "Authorization: token $GITHUB_TOKEN" \
+        https://api.github.com/repos/{owner}/{repo}
+   ```
+
+4. List pull requests:
+   ```
+   curl -H "Authorization: token $GITHUB_TOKEN" \
+        https://api.github.com/repos/{owner}/{repo}/pulls
+   ```
+
+5. Get user information:
+   ```
+   curl -H "Authorization: token $GITHUB_TOKEN" \
+        https://api.github.com/user
+   ```
+
+Replace `{owner}`, `{repo}`, and `{issue_number}` with appropriate values.
+
+## Important Notes
+
+1. Always use the GitHub API for operations instead of a web browser.
+2. The `GITHUB_TOKEN` is read-only. Avoid operations that require write access.
+3. Git config (username and email) is pre-set. Do not modify.
+4. Edit and test code locally. Never push directly to remote.
+5. Verify correct branch before committing.
+6. Commit changes frequently.
+7. If the issue or task is ambiguous or lacks sufficient detail, always request clarification from the user before proceeding.
+8. You should avoid using command line tools like `sed` for file editing.
diff --git a/config.template.toml b/config.template.toml
@@ -64,6 +64,15 @@ workspace_base = "./workspace"
 # Name of the default agent
 #default_agent = "CodeActAgent"
 
+# JWT secret for authentication
+#jwt_secret = ""
+
+# Restrict file types for file uploads
+#file_uploads_restrict_file_types = false
+
+# List of allowed file extensions for uploads
+#file_uploads_allowed_extensions = [".*"]
+
 #################################### LLM #####################################
 # Configuration for LLM models (group name starts with 'llm')
 # use 'llm' for the default LLM config
@@ -126,6 +135,15 @@ model = "gpt-4o"
 # Retry minimum wait time
 #retry_min_wait = 3
 
+# Retry multiplier for exponential backoff
+#retry_multiplier = 2.0
+
+# Drop any unmapped (unsupported) params without causing an exception
+#drop_params = false
+
+# Base URL for the OLLAMA API
+#ollama_base_url = ""
+
 # Temperature for the API
 #temperature = 0.0
 
@@ -149,6 +167,9 @@ model = "gpt-3.5"
 # agent.CodeActAgent
 ##############################################################################
 [agent]
+# Name of the micro agent to use for this agent
+#micro_agent_name = ""
+
 # Memory enabled
 #memory_enabled = false
 
@@ -182,6 +203,18 @@ llm_config = 'gpt3'
 # Enable auto linting after editing
 #enable_auto_lint = false
 
+# Whether to initialize plugins
+#initialize_plugins = true
+
+# Extra dependencies to install in the runtime image
+#runtime_extra_deps = ""
+
+# Environment variables to set at the launch of the runtime
+#runtime_startup_env_vars = {}
+
+# BrowserGym environment to use for evaluation
+#browsergym_eval_env = ""
+
 #################################### Security ###################################
 # Configuration for security features
 ##############################################################################

diff --git a/openhands/core/config.py b/openhands/core/config.py
@@ -123,11 +123,13 @@ class AgentConfig:
     """Configuration for the agent.
 
     Attributes:
+        micro_agent_name: The name of the micro agent to use for this agent.
         memory_enabled: Whether long-term memory (embeddings) is enabled.
         memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
         llm_config: The name of the llm config to use. If specified, this will override global llm config.
     """
 
+    micro_agent_name: str | None = None
     memory_enabled: bool = False
     memory_max_threads: int = 2
     llm_config: str | None = None

diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py
@@ -72,3 +72,8 @@ def __init__(self, message='Failed to retrieve action from LLM response'):
 class UserCancelledError(Exception):
     def __init__(self, message='User cancelled the request'):
         super().__init__(message)
+
+
+class MicroAgentValidationError(Exception):
+    def __init__(self, message='Micro agent validation failed'):
+        super().__init__(message)
diff --git a/openhands/utils/microagent.py b/openhands/utils/microagent.py
@@ -0,0 +1,44 @@
+import os
+
+import frontmatter
+import pydantic
+
+from openhands.controller.agent import Agent
+from openhands.core.exceptions import MicroAgentValidationError
+from openhands.core.logger import openhands_logger as logger
+
+
+class MicroAgentMetadata(pydantic.BaseModel):
+    name: str
+    agent: str
+    require_env_var: dict[str, str]
+
+
+class MicroAgent:
+    def __init__(self, path: str):
+        self.path = path
+        if not os.path.exists(path):
+            raise FileNotFoundError(f'Micro agent file {path} is not found')
+        with open(path, 'r') as file:
+            self._loaded = frontmatter.load(file)
+            self._content = self._loaded.content
+            self._metadata = MicroAgentMetadata(**self._loaded.metadata)
+        self._validate_micro_agent()
+
+    @property
+    def content(self) -> str:
+        return self._content
+
+    def _validate_micro_agent(self):
+        logger.info(
+            f'Loading and validating micro agent [{self._metadata.name}] based on [{self._metadata.agent}]'
+        )
+        # Make sure the agent is registered
+        agent_cls = Agent.get_cls(self._metadata.agent)
+        assert agent_cls is not None
+        # Make sure the environment variables are set
+        for env_var, instruction in self._metadata.require_env_var.items():
+            if env_var not in os.environ:
+                raise MicroAgentValidationError(
+                    f'Environment variable [{env_var}] is required by micro agent [{self._metadata.name}] but not set. {instruction}'
+                )
diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py
@@ -2,6 +2,8 @@
 
 from jinja2 import Template
 
+from openhands.utils.microagent import MicroAgent
+
 
 class PromptManager:
     """
@@ -14,23 +16,21 @@ class PromptManager:
     Attributes:
         prompt_dir (str): Directory containing prompt templates.
         agent_skills_docs (str): Documentation of agent skills.
-        micro_agent (str | None): Content of the micro-agent definition file, if specified.
+        micro_agent (MicroAgent | None): Micro-agent, if specified.
     """
 
     def __init__(
         self,
         prompt_dir: str,
         agent_skills_docs: str,
-        micro_agent_name: str | None = None,
+        micro_agent: MicroAgent | None = None,
     ):
         self.prompt_dir: str = prompt_dir
         self.agent_skills_docs: str = agent_skills_docs
 
         self.system_template: Template = self._load_template('system_prompt')
         self.user_template: Template = self._load_template('user_prompt')
-        self.micro_agent: str | None = (
-            self._load_micro_agent(micro_agent_name) if micro_agent_name else None
-        )
+        self.micro_agent: MicroAgent | None = micro_agent
 
     def _load_template(self, template_name: str) -> Template:
         template_path = os.path.join(self.prompt_dir, f'{template_name}.j2')
@@ -39,15 +39,6 @@ def _load_template(self, template_name: str) -> Template:
         with open(template_path, 'r') as file:
             return Template(file.read())
 
-    def _load_micro_agent(self, micro_agent_name: str) -> str:
-        micro_agent_path = os.path.join(self.prompt_dir, f'micro/{micro_agent_name}.md')
-        if not os.path.exists(micro_agent_path):
-            raise FileNotFoundError(
-                f'Micro agent file {micro_agent_path} for {micro_agent_name} is not found'
-            )
-        with open(micro_agent_path, 'r') as file:
-            return file.read()
-
     @property
     def system_message(self) -> str:
         rendered = self.system_template.render(
@@ -66,5 +57,7 @@ def initial_user_message(self) -> str:
         These additional context will convert the current generic agent
         into a more specialized agent that is tailored to the user's task.
         """
-        rendered = self.user_template.render(micro_agent=self.micro_agent)
+        rendered = self.user_template.render(
+            micro_agent=self.micro_agent.content if self.micro_agent else None
+        )
         return rendered.strip()
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,6 +47,7 @@ tree-sitter = "0.21.3"
 bashlex = "^0.18"
 pyjwt = "^2.9.0"
 dirhash = "*"
+python-frontmatter = "^1.1.0"
 python-docx = "*"
 PyPDF2 = "*"
 python-pptx = "*"
@@ -83,6 +84,7 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]
 
+
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@@ -113,6 +115,7 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 
+
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"