[Feat] minor update agent related (#839)

* [Feat] update cibench

* [Feat] Support CIBench

* [Feat] Support CIBench

* [Feat] Support CIBench

* [Feat] Support CIBench
This commit is contained in:
Hubert 2024-01-26 14:15:51 +08:00 committed by GitHub
parent 77be07dbb5
commit 4aa74565e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 286 additions and 11 deletions

View File

@ -0,0 +1,96 @@
from mmengine.config import read_base
from opencompass.lagent.actions.ipython_interpreter import IPythonInterpreter
from opencompass.lagent.agents.react import CIReAct, ReActProtocol
from opencompass.models.lagent import CodeAgent
from opencompass.models.openai_api import OpenAI
from opencompass.partitioners import SizePartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLInferTask
with read_base():
from .datasets.CIBench.CIBench_template_gen_e6b12a import \
cibench_datasets as datasets
FORCE_STOP_PROMPT_EN = """You should directly give results based on history information."""
FEWSHOT_INSTRUCTION = """\
You are an assistant who can utilize external tools.
{tool_description}
To use a tool, please response with the following format:
```
{thought} Think what you need to solve, do you need to use tools?
{action} The tool name, should be one of [{action_names}].
{action_input} The input to the tool that you want to use.
```
The tool will give you response after your response using the following format:
```
{response} the results after call the tool.
```
Therefore DO NOT generate tool response by yourself.
Also please follow the guidelines:
1. Always use code interpreter to solve the problem.
2. The generated codes should always in a markdown code block format.
3. The generated codes will be executed in an ipython manner and the results will be cached.
4. Your responded code should always be simple and only solves the problem in current step.
For example:
File url: `xxxx`
### Step 1. Load the dataset from the url into a pandas DataFrame named `df`.
{thought} We should use `pandas` to solve this step.
{action} IPythonInterpreter
{action_input} ```python
import pandas as pd
url = "xxxx"
data = pd.read_csv(url)
```
{response} The code is succeed without any outputs.
Let us begin from here!
"""
IPYTHON_INTERPRETER_DESCRIPTION = '''\
It can run Python code in a manner as jupyter notebook. The code must be a valid code that contains only python method.'''
models = [
dict(
abbr='gpt-3.5-code',
type=CodeAgent,
agent_type=CIReAct,
max_turn=3,
llm=dict(
type=OpenAI,
path='gpt-3.5-turbo',
key='ENV',
query_per_second=1,
max_seq_len=4096,
),
actions=[
dict(type=IPythonInterpreter,
description=IPYTHON_INTERPRETER_DESCRIPTION,
user_data_dir='./data/cibench_dataset/datasources')
],
protocol=dict(
type=ReActProtocol,
call_protocol=FEWSHOT_INSTRUCTION,
force_stop=FORCE_STOP_PROMPT_EN,
finish=dict(role='FINISH', begin='Final Answer:', end='\n'),
),
batch_size=1,
use_system_role=False, # use `user` role instead of system role
first_system_role=False, # use `user` role of the first instruction prompt
merge_adjacent_role=True, # merge adjacent same user content
),
]
infer = dict(
partitioner=dict(type=SizePartitioner, max_task_size=1000),
runner=dict(
type=LocalRunner,
max_num_workers=16,
task=dict(type=OpenICLInferTask)),
)

View File

@ -105,10 +105,11 @@ def load_experiment_template(file: str) -> dict:
for _output in cell['outputs']:
if _output['output_type'] == 'display_data':
assert not output_flag
if 'image/png' in _output['data']:
output_flag = True
tags.append('vis')
outputs.append(_output['data']['image/png'])
for _output in cell['outputs']:
for _output in cell['outputs'][::-1]:
if output_flag:
break
if _output['output_type'] == 'stream' and _output[
@ -290,11 +291,26 @@ class CIBenchEvaluator(BaseEvaluator):
if action['result']:
try:
pred = action['result']['text']
match = re.search('execute_result:\n\n```\n(.*?)\n```',
match_exec = re.search(
'execute_result:\n\n```\n(.*?)\n```', pred,
re.DOTALL)
match_stdout = re.search('stdout:\n\n```\n(.*?)\n```',
pred, re.DOTALL)
# get pred result from execute_result by default
# else stdout
if match_exec and match_stdout:
match = match_exec
elif match_exec:
match = match_exec
elif match_stdout:
match = match_stdout
else:
match = None
if match:
out = match.group(1)
return out.strip() == target.strip()
score = (out.strip() == target.strip()
or target.strip() in out.strip())
return score
except Exception:
return False
# Fall back to False

View File

@ -1,7 +1,136 @@
from lagent.agents.react import ReAct
import copy
from typing import Dict, List
from lagent.actions import ActionExecutor
from lagent.agents.react import ReAct as _ReAct
from lagent.agents.react import ReActProtocol as _ReActProtocol
from lagent.schema import ActionReturn, ActionStatusCode, AgentReturn
class ReActProtocol(_ReActProtocol):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
# defaults to system
self.system_role = 'system'
self.first_system_role = 'system'
self.merge_adjacent_role = False
def format(self,
chat_history: List[Dict],
inner_step: List[Dict],
action_executor: ActionExecutor,
force_stop: bool = False) -> list:
"""Generate the ReAct format prompt.
Args:
chat_history (List[Dict]): The history log in previous runs.
inner_step (List[Dict]): The log in the current run.
action_executor (ActionExecutor): the action manager to
execute actions.
force_stop (boolean): whether force the agent to give responses
under pre-defined turns.
Returns:
List[Dict]: ReAct format prompt.
"""
call_protocol = self.call_protocol.format(
tool_description=action_executor.get_actions_info(),
action_names=action_executor.action_names(),
thought=self.thought['begin'],
action=self.action['begin'],
action_input=self.action_input['begin'],
response=self.response['begin'],
finish=self.finish['begin'],
)
formatted = []
formatted.append(
dict(role=self.first_system_role, content=call_protocol))
formatted += chat_history
formatted += inner_step
if force_stop:
formatted.append(
dict(role=self.system_role, content=self.force_stop))
if self.merge_adjacent_role and formatted:
merged = [formatted[0]] # Add the first dict
for d in formatted[1:]:
# If the 'role' of current dict matches with the 'role' of the
# last dict in merged list,
# append its 'content' to the 'content' of the last dict.
if d['role'] == merged[-1]['role']:
merged[-1]['content'] += d['content']
else:
# If 'role' does not match, add it as a new dict in the
# merged list
merged.append(d)
return merged
return formatted
class ReAct(_ReAct):
def __init__(self,
use_system_role: bool = True,
first_system_role: bool = True,
merge_adjacent_role: bool = False,
**kwargs) -> None:
super().__init__(**kwargs)
if use_system_role:
self.system_role = 'system'
else:
self.system_role = 'user'
if use_system_role or first_system_role:
first_system_role = 'system'
else:
first_system_role = 'user'
self._protocol.first_system_role = first_system_role
self._protocol.system_role = self.system_role
self._protocol.merge_adjacent_role = merge_adjacent_role
def chat(self, message: str) -> AgentReturn:
for hist in self._session_history:
if hist['role'] == 'system':
hist['role'] = self.system_role
self._inner_history = []
self._inner_history.append(dict(role='user', content=message))
agent_return = AgentReturn()
default_response = 'Sorry that I cannot answer your question.'
for turn in range(self.max_turn):
prompt = self._protocol.format(
chat_history=self.session_history,
inner_step=self._inner_history,
action_executor=self._action_executor,
force_stop=(turn == self.max_turn - 1))
response = self._llm.generate_from_template(prompt, 512)
self._inner_history.append(dict(role='assistant',
content=response))
thought, action, action_input = self._protocol.parse(
response, self._action_executor)
action_return: ActionReturn = self._action_executor(
action, action_input)
action_return.thought = thought
agent_return.actions.append(action_return)
if action_return.type == self._action_executor.finish_action.name:
agent_return.response = action_return.result['text']
break
self._inner_history.append(
dict(role=self.system_role,
content=self._protocol.format_response(action_return)))
else:
agent_return.response = default_response
agent_return.inner_steps = copy.deepcopy(self._inner_history)
# only append the user and final response
self._session_history.append(dict(role='user', content=message))
self._session_history.append(
dict(role='assistant', content=agent_return.response))
return agent_return
class CIReAct(ReAct):
"""Code Interpreter version of ReAct. The success state is different from
ReAct.
@ -27,6 +156,9 @@ class CIReAct(ReAct):
b.reset()
def chat(self, message: str) -> AgentReturn:
for hist in self._session_history:
if hist['role'] == 'system':
hist['role'] = self.system_role
self._inner_history = []
# append the user message for session history
self._session_history.append(dict(role='user', content=message))
@ -54,14 +186,14 @@ class CIReAct(ReAct):
dict(role='assistant', content=response))
self._session_history.append(
dict(
role='system',
role=self.system_role,
content=self._protocol.format_response(action_return)))
agent_return.response = action_return.result['text']
return agent_return
elif action_return.type == self._action_executor.invalid_action.name: # noqa
action_return.errmsg = 'The action is invalid, please check the action name.' # noqa
self._inner_history.append(
dict(role='system',
dict(role=self.system_role,
content=self._protocol.format_response(action_return)))
if turn == self.max_turn - 1:
force_stop = True

View File

@ -42,6 +42,26 @@ class LagentAgent:
def set_history(self, history):
self.agent._session_history = deepcopy(history)
def gt_response(self, prompt):
if 'CIReAct' in str(self.agent.__class__):
gold = prompt
prompt = f"""{self.agent._protocol.action['begin']} IPythonInterpreter
{self.agent._protocol.action_input['begin']} ```python\n{gold}\n```\n""" # noqa
action_input = dict(
command=f"""```python\n{gold}\n```\n""",
timeout=120,
)
response = self.agent._action_executor('IPythonInterpreter',
action_input)
gt_response = dict(role='assistant', content=prompt)
system_response = dict(
role='system',
content=self.agent._protocol.format_response(response))
return [gt_response, system_response]
else:
gt_response = dict(role='assistant', content=prompt)
return [gt_response]
@property
def template_parser(self):
return self.agent._llm.template_parser

View File

@ -124,8 +124,15 @@ class AgentInferencer(ChatInferencer):
i for i, item in enumerate(chat) if item['role'] == 'assistant'
]
history = chat[:assistant_indices[0] - 1]
prev_idx = 0
for i in assistant_indices:
self.model.set_history(chat[:i - 1])
for j in range(prev_idx, i - 1):
if chat[j]['role'] == 'assistant':
history += self.model.gt_response(chat[j]['content'])
elif chat[j]['role'] == 'user':
history += [chat[j]]
self.model.set_history(history)
answer, steps, _ = self.model.chat(chat[i - 1]['content'])
output_handler.save_multiround_results(
origin_prompt=chat[i - 1]['content'],
@ -134,4 +141,6 @@ class AgentInferencer(ChatInferencer):
idx=index,
gold=chat[i]['content'],
)
history += [chat[i - 1]]
prev_idx = i
self.model.reset()

View File

@ -6,6 +6,8 @@ jupyter
jupyter_client
jupytext
lagent
lightgbm==4.1.0
networkx
scikit-image
sympy==1.12
tensorflow==2.14.0