Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CodeActSWEAgent to remove browsing & github + improvements on agentskills #2105

Merged
merged 28 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e9d7889
update swe_bench prompt;
xingyaoww May 27, 2024
8ec58d2
upgrade agentskills and update testcases
xingyaoww May 27, 2024
1e58a12
update infer prompt
xingyaoww May 27, 2024
80c0a33
fix cwd
xingyaoww May 27, 2024
4aeb002
add icl for swebench
xingyaoww May 27, 2024
4f853e7
also log in_context_example to run infer
xingyaoww May 27, 2024
2a1cc9a
remove extra print
xingyaoww May 27, 2024
deef10b
change prompt to abs path
xingyaoww May 27, 2024
7783c10
update error message to include current file info
xingyaoww May 27, 2024
c2a284f
change cwd for jupyter if needed
xingyaoww May 27, 2024
604c8d9
update edit error message
xingyaoww May 27, 2024
851df73
update prompt
xingyaoww May 28, 2024
6e2736f
improve git get patch
xingyaoww May 28, 2024
a36f6f5
update hint string
xingyaoww May 28, 2024
cb23bdb
default to 50 turns
xingyaoww May 28, 2024
fa97e57
revert changes from codeact agent and create new CodeActSWEAgent
xingyaoww May 28, 2024
a98f15a
revert changes to codeact
xingyaoww May 28, 2024
a9dc3ce
revert instructions for run infer
xingyaoww May 28, 2024
a27b0bb
revert instructions for run infer
xingyaoww May 28, 2024
368f0b9
update README
xingyaoww May 28, 2024
e699f21
update max iter
xingyaoww May 28, 2024
3eaa6fb
add codeact swe agent
xingyaoww May 28, 2024
832a828
fix issue for CodeActSWEAgent
xingyaoww May 28, 2024
95eb048
allow specifying max iter in cmdline script
xingyaoww May 28, 2024
a4af937
stop printing
xingyaoww May 28, 2024
f6a6854
Update agenthub/codeact_swe_agent/README.md
xingyaoww May 28, 2024
fdce0ce
Fix prompt regression in jupyter plugin
li-boxuan May 29, 2024
9b499e5
Merge remote-tracking branch 'upstream/main' into xw/swe-bench-prompt
li-boxuan May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions agenthub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
SWE_agent,
browsing_agent,
codeact_agent,
codeact_swe_agent,
delegator_agent,
dummy_agent,
monologue_agent,
Expand All @@ -21,6 +22,7 @@
__all__ = [
'monologue_agent',
'codeact_agent',
'codeact_swe_agent',
'planner_agent',
'SWE_agent',
'delegator_agent',
Expand Down
24 changes: 15 additions & 9 deletions agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
)


# FIXME: We can tweak these two settings to create MicroAgents specialized toward different area
def get_system_message() -> str:
if ENABLE_GITHUB:
yufansong marked this conversation as resolved.
Show resolved Hide resolved
return f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
else:
return f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'


def get_in_context_example() -> str:
return EXAMPLES


class CodeActAgent(Agent):
VERSION = '1.5'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether should we bump the version 1.6? The prompts keep the same, but change some logic.

"""
Expand Down Expand Up @@ -152,11 +164,8 @@ class CodeActAgent(Agent):
]
jupyter_kernel_init_code: str = 'from agentskills import *'

system_message: str = (
f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
if ENABLE_GITHUB
else f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
)
system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"

def __init__(
self,
Expand Down Expand Up @@ -194,10 +203,7 @@ def step(self, state: State) -> Action:
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{
'role': 'user',
'content': f"Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\nNOW, LET'S START!",
},
{'role': 'user', 'content': self.in_context_example},
]

for prev_action, obs in state.history:
Expand Down
14 changes: 11 additions & 3 deletions agenthub/codeact_agent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@
"Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run."
)

SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
# ======= SYSTEM MESSAGE =======
MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
"""

BROWSING_PREFIX = """The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
"""
PIP_INSTALL_PREFIX = """The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""

SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX

GITHUB_MESSAGE = """To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands:
Expand All @@ -30,6 +36,8 @@
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
"""


# ======= EXAMPLE MESSAGE =======
EXAMPLES = """
--- START OF EXAMPLE ---

Expand Down
7 changes: 7 additions & 0 deletions agenthub/codeact_swe_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# CodeAct (SWE Edit Specialized)

This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct using the `agentskills` library of OpenDevin.

Its intended use is **solving Github issues**.

It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.
yufansong marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 5 additions & 0 deletions agenthub/codeact_swe_agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from opendevin.controller.agent import Agent

from .codeact_swe_agent import CodeActSWEAgent

Agent.register('CodeActSWEAgent', CodeActSWEAgent)
246 changes: 246 additions & 0 deletions agenthub/codeact_swe_agent/codeact_swe_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
import re

from agenthub.codeact_swe_agent.prompt import (
COMMAND_DOCS,
MINIMAL_SYSTEM_PREFIX,
SWE_EXAMPLE,
SYSTEM_SUFFIX,
)
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.events.action import (
Action,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
)
from opendevin.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
IPythonRunCellObservation,
)
from opendevin.llm.llm import LLM
from opendevin.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
PluginRequirement,
)


def parse_response(response) -> str:
action = response.choices[0].message.content
for lang in ['bash', 'ipython', 'browse']:
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
action += f'</execute_{lang}>'
return action


def action_to_str(action: Action) -> str:
if isinstance(action, CmdRunAction):
return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
elif isinstance(action, IPythonRunCellAction):
return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
elif isinstance(action, BrowseInteractiveAction):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly the BrowseInteractiveAction should be remove, is this the case or I simply misunderstood?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my understanding, this agent, CodeAct-SWE, is supposed to know how to browse and use github. It takes over those responsibilities, 'freeing' CodeAct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. I think this should be removed. CodeActSWE does not have the ability to browse.

Good catch... It is not relevant. But following the same reason above, I have already run a couple of experiments with this prompt - so hoping to keep it as is for now.

How about we start a separate PR to correct these typos, and at the same time bump the CodeActSWEAgent's version from v1.5 to v1.6 for better reproducibility.

^ For this reason, how about we starts a separate PR once this is merged to correct all these issues and bump the version?

return f'{action.thought}\n<execute_browse>\n{action.browser_actions}\n</execute_browse>'
elif isinstance(action, MessageAction):
return action.content
return ''


def get_action_message(action: Action) -> dict[str, str] | None:
if (
isinstance(action, BrowseInteractiveAction)
or isinstance(action, CmdRunAction)
or isinstance(action, IPythonRunCellAction)
or isinstance(action, MessageAction)
):
return {
'role': 'user' if action.source == 'user' else 'assistant',
'content': action_to_str(action),
}
return None


def get_observation_message(obs) -> dict[str, str] | None:
if isinstance(obs, CmdOutputObservation):
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
content += (
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]'
)
return {'role': 'user', 'content': content}
elif isinstance(obs, IPythonRunCellObservation):
content = 'OBSERVATION:\n' + obs.content
# replace base64 images with a placeholder
splitted = content.split('\n')
for i, line in enumerate(splitted):
if '![image](data:image/png;base64,' in line:
splitted[i] = (
'![image](data:image/png;base64, ...) already displayed to user'
)
content = '\n'.join(splitted)
content = truncate_observation(content)
return {'role': 'user', 'content': content}
elif isinstance(obs, BrowserOutputObservation):
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
return {'role': 'user', 'content': content}
return None


def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
"""
Truncate the middle of the observation if it is too long.
"""
if len(observation) <= max_chars:
return observation
half = max_chars // 2
return (
observation[:half]
+ '\n[... Observation truncated due to length ...]\n'
+ observation[-half:]
)


def get_system_message() -> str:
return f'{MINIMAL_SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'


def get_in_context_example() -> str:
return SWE_EXAMPLE


class CodeActSWEAgent(Agent):
VERSION = '1.5'
li-boxuan marked this conversation as resolved.
Show resolved Hide resolved
"""
This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct 1.5 using the `agentskills` library of OpenDevin.

It is intended use is **solving Github issues**.

It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.
"""

sandbox_plugins: list[PluginRequirement] = [
# NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
# AgentSkillsRequirement provides a lot of Python functions
# and it need to be initialized before Jupyter for Jupyter to use those functions.
AgentSkillsRequirement(),
JupyterRequirement(),
]
jupyter_kernel_init_code: str = 'from agentskills import *'

system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"

def __init__(
self,
llm: LLM,
) -> None:
"""
Initializes a new instance of the CodeActAgent class.

Parameters:
- llm (LLM): The llm to be used by this agent
"""
super().__init__(llm)
self.reset()

def reset(self) -> None:
"""
Resets the CodeAct Agent.
"""
super().reset()

def step(self, state: State) -> Action:
"""
Performs one step using the CodeAct Agent.
This includes gathering info on previous steps and prompting the model to make a command to execute.

Parameters:
- state (State): used to get updated info and background commands

Returns:
- CmdRunAction(command) - bash command to run
- IPythonRunCellAction(code) - IPython code to run
- BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]

for prev_action, obs in state.history:
action_message = get_action_message(prev_action)
if action_message:
messages.append(action_message)

obs_message = get_observation_message(obs)
if obs_message:
messages.append(obs_message)

latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
if latest_user_message:
if latest_user_message['content'].strip() == '/exit':
return AgentFinishAction()
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
)

response = self.llm.do_completion(
messages=messages,
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
)

action_str: str = parse_response(response)
state.num_of_chars += sum(
len(message['content']) for message in messages
) + len(action_str)

if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this finish block in use? I cannot see instructions for the model to use <finish>...</finish>.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope! Let's remove it

thought = action_str.replace(finish_command.group(0), '').strip()
return AgentFinishAction(thought=thought)
if bash_command := re.search(
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
):
# remove the command from the action string to get thought
thought = action_str.replace(bash_command.group(0), '').strip()
# a command was found
command_group = bash_command.group(1).strip()

if command_group.strip() == 'exit':
return AgentFinishAction()
return CmdRunAction(command=command_group, thought=thought)
elif python_code := re.search(
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
):
# a code block was found
code_group = python_code.group(1).strip()
thought = action_str.replace(python_code.group(0), '').strip()
return IPythonRunCellAction(
code=code_group,
thought=thought,
kernel_init_code=self.jupyter_kernel_init_code,
)
elif browse_command := re.search(
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
):
# BrowserGym actions was found
browse_actions = browse_command.group(1).strip()
thought = action_str.replace(browse_command.group(0), '').strip()
return BrowseInteractiveAction(
browser_actions=browse_actions, thought=thought
)
else:
# We assume the LLM is GOOD enough that when it returns pure natural language
# it want to talk to the user
return MessageAction(content=action_str, wait_for_response=True)

def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')
Loading