[Feat] update gsm8k and math agent config (#652)

* [Feat] update gsm8k and math agent config

* minor fix
This commit is contained in:
Hubert 2023-12-01 15:08:38 +08:00 committed by GitHub
parent a331c9abfd
commit 9eb5cadcac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 113 additions and 6 deletions

View File

@ -0,0 +1,89 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import AgentInferencer
from opencompass.datasets import (
MATHDataset, MATHAgentEvaluator, math_postprocess
)
# use pal format but not perform well
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
# # ################################### NEW SHOT ###################################
dict(role='HUMAN', prompt='Find the coefficient of $x^3$ when $3(x^2 - x^3+x) +3(x +2x^3- 3x^2 + 3x^5+x^3) -5(1+x-4x^3 - x^2)$ is simplifie.'),
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:from sympy import symbols, simplify
def solution():
x = symbols('x')
expr = 3*(x**2 - x**3 + x) + 3*(x + 2*x**3 - 3*x**2 + 3*x**5 + x**3) - 5*(1 + x - 4*x**3 - x**2)
simplified_expr = simplify(expr)
x3_coefficient = simplified_expr.as_coefficients_dict()[x**3]
result = x3_coefficient
return result"""),
dict(role='SYSTEM', prompt='Response:26'),
dict(role='BOT', prompt='FinalAnswer: The final answer is $26$. I hope it is correct.'),
dict(role='HUMAN', prompt='The surface area of a sphere with radius $r$ is $4\pi r^2$. Including the area of its circular base, what is the total surface area of a hemisphere with radius 6 cm? Express your answer in terms of $\pi$.'),
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:import math
def solution():
radius = 6
# Surface area of the hemisphere
hemisphere_area = 2 * math.pi * radius**2
# Area of the circular base
base_area = math.pi * radius**2
# Total surface area
total_surface_area = hemisphere_area + base_area
# Formatting the result in LaTeX
result = r'{}\pi'.format(total_surface_area / math.pi)
return result"""),
dict(role='SYSTEM', prompt='Response:108.0\\pi'),
dict(role='BOT', prompt='FinalAnswer: The final answer is $108.0\pi$. I hope it is correct.'),
dict(role='HUMAN', prompt='Monica tosses a fair 6-sided die. If the roll is a prime number, then she wins that amount of dollars (so that, for example, if she rolls 3, then she wins 3 dollars). If the roll is composite, she wins nothing. Otherwise, she loses 3 dollars. What is the expected value of her winnings on one die toss? Express your answer as a dollar value to the nearest cent.'),
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:def solution():
# Probabilities of each outcome
prime_prob = 1 / 6
composite_prob = 1 / 3
otherwise_prob = 1 / 6
# Expected value of each outcome
prime_expected_value = (2 * prime_prob) + (3 * prime_prob) + (5 * prime_prob)
composite_expected_value = 0 * composite_prob
otherwise_expected_value = -3 * otherwise_prob
# Total expected value
total_expected_value = prime_expected_value + composite_expected_value + otherwise_expected_value
# Dollar value to the nearest cent
result = "{:.2f}".format(total_expected_value)
return result"""),
dict(role='SYSTEM', prompt='Response:1.17'),
dict(role='BOT', prompt='FinalAnswer: The final answer is $1.17$. I hope it is correct.'),
dict(role='HUMAN', prompt='{problem}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=AgentInferencer),
)
math_eval_cfg = dict(
evaluator=dict(type=MATHAgentEvaluator),
pred_postprocessor=dict(type=math_postprocess),
)
math_datasets = [
dict(
abbr='math',
type=MATHDataset,
path='./data/math/math.json',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg,
)
]

View File

@ -49,9 +49,10 @@ def gsm8k_postprocess(text: str) -> str:
break break
ret1 = '' ret1 = ''
for i in range(len(ret)): for i in range(len(ret)):
if ret[i].isdigit(): # deal with potential float number
if ret[i].isdigit() or ret[i] == '.':
ret1 += ret[i] ret1 += ret[i]
return ret1 return ret1.strip('.')
class Gsm8kEvaluator(BaseEvaluator): class Gsm8kEvaluator(BaseEvaluator):
@ -87,15 +88,23 @@ class Gsm8kAgentEvaluator(BaseEvaluator):
def __init__(self, action: str = 'PythonInterpreter'): def __init__(self, action: str = 'PythonInterpreter'):
self.action = action self.action = action
def is_equal(self, pred, refer):
try:
if pred == refer or abs(float(pred) - int(refer)) < 1e-6:
return True
except Exception:
pass
return False
def soft_equal(self, pred, refer, step): def soft_equal(self, pred, refer, step):
try: try:
soft_pred = step['result']['text'] soft_pred = step['result']['text']
if str(int(float(soft_pred))) == refer: if abs(float(soft_pred) - int(refer)) < 1e-6:
return True return True
except Exception: except Exception:
# result might not exists # result might not exists
# text cannot convert to float # text cannot convert to float
print(pred, soft_pred, refer) pass
return False return False
def get_action(self, step): def get_action(self, step):
@ -114,7 +123,7 @@ class Gsm8kAgentEvaluator(BaseEvaluator):
total = len(references) total = len(references)
for pred, refer, step in zip(predictions, references, steps): for pred, refer, step in zip(predictions, references, steps):
# if final answer right # if final answer right
if pred == refer: if self.is_equal(pred, refer):
if self.get_action(step): if self.get_action(step):
final_scope += 1 final_scope += 1
else: else:

View File

@ -49,6 +49,9 @@ class HuggingFace(BaseModel):
use_fastchat_template (str, optional): Whether to use fastchat to get use_fastchat_template (str, optional): Whether to use fastchat to get
the conversation template. If True, fastchat needs to be the conversation template. If True, fastchat needs to be
implemented first. Defaults to False. implemented first. Defaults to False.
end_str (str, optional): Whether to trim generated strings with end_str
if the model has special ending strings that are not handled well.
Defaults to None.
Note: Note:
About ``extract_pred_after_decode``: Commonly, we should extract the About ``extract_pred_after_decode``: Commonly, we should extract the
@ -72,7 +75,8 @@ class HuggingFace(BaseModel):
batch_padding: bool = False, batch_padding: bool = False,
pad_token_id: Optional[int] = None, pad_token_id: Optional[int] = None,
mode: str = 'none', mode: str = 'none',
use_fastchat_template: bool = False): use_fastchat_template: bool = False,
end_str: Optional[str] = None):
super().__init__(path=path, super().__init__(path=path,
max_seq_len=max_seq_len, max_seq_len=max_seq_len,
tokenizer_only=tokenizer_only, tokenizer_only=tokenizer_only,
@ -96,6 +100,7 @@ class HuggingFace(BaseModel):
peft_path=peft_path) peft_path=peft_path)
self.generation_kwargs = generation_kwargs self.generation_kwargs = generation_kwargs
self.use_fastchat_template = use_fastchat_template self.use_fastchat_template = use_fastchat_template
self.end_str = end_str
def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
tokenizer_kwargs: dict): tokenizer_kwargs: dict):
@ -266,6 +271,8 @@ class HuggingFace(BaseModel):
token[len_:] for token, len_ in zip(decodeds, prompt_lens) token[len_:] for token, len_ in zip(decodeds, prompt_lens)
] ]
if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds]
return decodeds return decodeds
def _single_generate(self, inputs: List[str], max_out_len: int, def _single_generate(self, inputs: List[str], max_out_len: int,
@ -329,6 +336,8 @@ class HuggingFace(BaseModel):
token[len_:] for token, len_ in zip(decodeds, prompt_lens) token[len_:] for token, len_ in zip(decodeds, prompt_lens)
] ]
if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds]
return decodeds return decodeds
def get_logits(self, inputs: List[str]): def get_logits(self, inputs: List[str]):