mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feat] update gsm8k and math agent config (#652)
* [Feat] update gsm8k and math agent config * minor fix
This commit is contained in:
parent
a331c9abfd
commit
9eb5cadcac
89
configs/datasets/math/math_agent_gen_861b4f.py
Normal file
89
configs/datasets/math/math_agent_gen_861b4f.py
Normal file
@ -0,0 +1,89 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import AgentInferencer
|
||||
from opencompass.datasets import (
|
||||
MATHDataset, MATHAgentEvaluator, math_postprocess
|
||||
)
|
||||
# use pal format but not perform well
|
||||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
|
||||
|
||||
math_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
# # ################################### NEW SHOT ###################################
|
||||
dict(role='HUMAN', prompt='Find the coefficient of $x^3$ when $3(x^2 - x^3+x) +3(x +2x^3- 3x^2 + 3x^5+x^3) -5(1+x-4x^3 - x^2)$ is simplifie.'),
|
||||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:from sympy import symbols, simplify
|
||||
|
||||
def solution():
|
||||
x = symbols('x')
|
||||
expr = 3*(x**2 - x**3 + x) + 3*(x + 2*x**3 - 3*x**2 + 3*x**5 + x**3) - 5*(1 + x - 4*x**3 - x**2)
|
||||
simplified_expr = simplify(expr)
|
||||
|
||||
x3_coefficient = simplified_expr.as_coefficients_dict()[x**3]
|
||||
result = x3_coefficient
|
||||
return result"""),
|
||||
dict(role='SYSTEM', prompt='Response:26'),
|
||||
dict(role='BOT', prompt='FinalAnswer: The final answer is $26$. I hope it is correct.'),
|
||||
dict(role='HUMAN', prompt='The surface area of a sphere with radius $r$ is $4\pi r^2$. Including the area of its circular base, what is the total surface area of a hemisphere with radius 6 cm? Express your answer in terms of $\pi$.'),
|
||||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:import math
|
||||
|
||||
def solution():
|
||||
radius = 6
|
||||
|
||||
# Surface area of the hemisphere
|
||||
hemisphere_area = 2 * math.pi * radius**2
|
||||
|
||||
# Area of the circular base
|
||||
base_area = math.pi * radius**2
|
||||
|
||||
# Total surface area
|
||||
total_surface_area = hemisphere_area + base_area
|
||||
|
||||
# Formatting the result in LaTeX
|
||||
result = r'{}\pi'.format(total_surface_area / math.pi)
|
||||
return result"""),
|
||||
dict(role='SYSTEM', prompt='Response:108.0\\pi'),
|
||||
dict(role='BOT', prompt='FinalAnswer: The final answer is $108.0\pi$. I hope it is correct.'),
|
||||
dict(role='HUMAN', prompt='Monica tosses a fair 6-sided die. If the roll is a prime number, then she wins that amount of dollars (so that, for example, if she rolls 3, then she wins 3 dollars). If the roll is composite, she wins nothing. Otherwise, she loses 3 dollars. What is the expected value of her winnings on one die toss? Express your answer as a dollar value to the nearest cent.'),
|
||||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:def solution():
|
||||
# Probabilities of each outcome
|
||||
prime_prob = 1 / 6
|
||||
composite_prob = 1 / 3
|
||||
otherwise_prob = 1 / 6
|
||||
|
||||
# Expected value of each outcome
|
||||
prime_expected_value = (2 * prime_prob) + (3 * prime_prob) + (5 * prime_prob)
|
||||
composite_expected_value = 0 * composite_prob
|
||||
otherwise_expected_value = -3 * otherwise_prob
|
||||
|
||||
# Total expected value
|
||||
total_expected_value = prime_expected_value + composite_expected_value + otherwise_expected_value
|
||||
|
||||
# Dollar value to the nearest cent
|
||||
result = "{:.2f}".format(total_expected_value)
|
||||
return result"""),
|
||||
dict(role='SYSTEM', prompt='Response:1.17'),
|
||||
dict(role='BOT', prompt='FinalAnswer: The final answer is $1.17$. I hope it is correct.'),
|
||||
dict(role='HUMAN', prompt='{problem}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=AgentInferencer),
|
||||
)
|
||||
|
||||
math_eval_cfg = dict(
|
||||
evaluator=dict(type=MATHAgentEvaluator),
|
||||
pred_postprocessor=dict(type=math_postprocess),
|
||||
)
|
||||
|
||||
math_datasets = [
|
||||
dict(
|
||||
abbr='math',
|
||||
type=MATHDataset,
|
||||
path='./data/math/math.json',
|
||||
reader_cfg=math_reader_cfg,
|
||||
infer_cfg=math_infer_cfg,
|
||||
eval_cfg=math_eval_cfg,
|
||||
)
|
||||
]
|
@ -49,9 +49,10 @@ def gsm8k_postprocess(text: str) -> str:
|
||||
break
|
||||
ret1 = ''
|
||||
for i in range(len(ret)):
|
||||
if ret[i].isdigit():
|
||||
# deal with potential float number
|
||||
if ret[i].isdigit() or ret[i] == '.':
|
||||
ret1 += ret[i]
|
||||
return ret1
|
||||
return ret1.strip('.')
|
||||
|
||||
|
||||
class Gsm8kEvaluator(BaseEvaluator):
|
||||
@ -87,15 +88,23 @@ class Gsm8kAgentEvaluator(BaseEvaluator):
|
||||
def __init__(self, action: str = 'PythonInterpreter'):
|
||||
self.action = action
|
||||
|
||||
def is_equal(self, pred, refer):
|
||||
try:
|
||||
if pred == refer or abs(float(pred) - int(refer)) < 1e-6:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
def soft_equal(self, pred, refer, step):
|
||||
try:
|
||||
soft_pred = step['result']['text']
|
||||
if str(int(float(soft_pred))) == refer:
|
||||
if abs(float(soft_pred) - int(refer)) < 1e-6:
|
||||
return True
|
||||
except Exception:
|
||||
# result might not exists
|
||||
# text cannot convert to float
|
||||
print(pred, soft_pred, refer)
|
||||
pass
|
||||
return False
|
||||
|
||||
def get_action(self, step):
|
||||
@ -114,7 +123,7 @@ class Gsm8kAgentEvaluator(BaseEvaluator):
|
||||
total = len(references)
|
||||
for pred, refer, step in zip(predictions, references, steps):
|
||||
# if final answer right
|
||||
if pred == refer:
|
||||
if self.is_equal(pred, refer):
|
||||
if self.get_action(step):
|
||||
final_scope += 1
|
||||
else:
|
||||
|
@ -49,6 +49,9 @@ class HuggingFace(BaseModel):
|
||||
use_fastchat_template (str, optional): Whether to use fastchat to get
|
||||
the conversation template. If True, fastchat needs to be
|
||||
implemented first. Defaults to False.
|
||||
end_str (str, optional): Whether to trim generated strings with end_str
|
||||
if the model has special ending strings that are not handled well.
|
||||
Defaults to None.
|
||||
|
||||
Note:
|
||||
About ``extract_pred_after_decode``: Commonly, we should extract the
|
||||
@ -72,7 +75,8 @@ class HuggingFace(BaseModel):
|
||||
batch_padding: bool = False,
|
||||
pad_token_id: Optional[int] = None,
|
||||
mode: str = 'none',
|
||||
use_fastchat_template: bool = False):
|
||||
use_fastchat_template: bool = False,
|
||||
end_str: Optional[str] = None):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
tokenizer_only=tokenizer_only,
|
||||
@ -96,6 +100,7 @@ class HuggingFace(BaseModel):
|
||||
peft_path=peft_path)
|
||||
self.generation_kwargs = generation_kwargs
|
||||
self.use_fastchat_template = use_fastchat_template
|
||||
self.end_str = end_str
|
||||
|
||||
def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
|
||||
tokenizer_kwargs: dict):
|
||||
@ -266,6 +271,8 @@ class HuggingFace(BaseModel):
|
||||
token[len_:] for token, len_ in zip(decodeds, prompt_lens)
|
||||
]
|
||||
|
||||
if self.end_str:
|
||||
decodeds = [token.split(self.end_str)[0] for token in decodeds]
|
||||
return decodeds
|
||||
|
||||
def _single_generate(self, inputs: List[str], max_out_len: int,
|
||||
@ -329,6 +336,8 @@ class HuggingFace(BaseModel):
|
||||
token[len_:] for token, len_ in zip(decodeds, prompt_lens)
|
||||
]
|
||||
|
||||
if self.end_str:
|
||||
decodeds = [token.split(self.end_str)[0] for token in decodeds]
|
||||
return decodeds
|
||||
|
||||
def get_logits(self, inputs: List[str]):
|
||||
|
Loading…
Reference in New Issue
Block a user