mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Mathbench update postprocess (#600)
* Update mathbench * Update mathbench
This commit is contained in:
parent
5e75e29711
commit
c9c5c5d92e
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .mathbench_gen_10da90 import mathbench_datasets # noqa: F401, F403
|
||||
from .mathbench_gen_ad37c1 import mathbench_datasets # noqa: F401, F403
|
||||
|
@ -3,17 +3,17 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
|
||||
from opencompass.datasets import MathBenchDataset, mathbench_postprocess
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
|
||||
single_choice_prompts = {
|
||||
"single_choice_cn_with_reasoning": "以下是一道关于数学的单项选择题,请你一步一步推理并得到最终的答案选项。回答格式为如下:\n答案选项:A、B、C、D中你认为正确的一个选项\n计算过程:根据题目得到选项答案的一步步过程\n请严格按照上面的格式回答问题,下面是你要回答的题目:\n{question}\n答案选项:",
|
||||
"single_choice_cn": "以下是一道关于数学的单项选择题,请你给出正确的答案选项。\n下面是你要回答的题目:\n{question}\n答案选项:",
|
||||
"single_choice_cn": "以下是一道关于数学的单项选择题,请你直接回答正确答案的选项序号。\n下面是你要回答的题目:\n{question}\n答案选项:",
|
||||
"single_choice_en_with_reasoning": "Here is a multiple-choice question about mathematics. Please provide the final answer option by step-by-step reasoning. Please answer in the following format:\nAnswer option: A, B, C, or D (the option you believe is correct)\nCalculation process: Step-by-step process to derive the answer option based on the question\nPlease strictly follow the above format to answer the question. Here is the question you need to answer:\n{question}\nAnswer option:",
|
||||
"single_choice_en": "Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nAnswer option:",
|
||||
}
|
||||
|
||||
cloze_prompts={
|
||||
cloze_prompts = {
|
||||
"cloze_cn": [
|
||||
dict(role='HUMAN', prompt='Q: 林中有15棵树。林务工人员今天将在林中种植树木。完成后,将有21棵树。林务工人员今天种植了多少棵树?'),
|
||||
dict(role='BOT', prompt='A: 我们从15棵树开始。后来有21棵树。差值必定是他们种植的树木数量。所以,他们必须种植了21 - 15 = 6棵树。答案是 6\n'),
|
||||
@ -53,15 +53,13 @@ cloze_prompts={
|
||||
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
|
||||
dict(role='HUMAN', prompt='Q: {question}'),
|
||||
dict(role='BOT', prompt='A: {answer}\n'),
|
||||
],
|
||||
}
|
||||
|
||||
]}
|
||||
|
||||
mathbench_sets = {
|
||||
'college': ['single_choice_cn', 'cloze_en'],
|
||||
'high': ['single_choice_cn', 'single_choice_en'],
|
||||
'middle': ['single_choice_cn'],
|
||||
'primary': ['cloze_cn'],
|
||||
'primary': ['cloze_cn']
|
||||
}
|
||||
|
||||
# Generate reasoning path if set True or just generate the final answer
|
||||
@ -75,10 +73,9 @@ mathbench_datasets = []
|
||||
for _split in list(mathbench_sets.keys()):
|
||||
for _name in mathbench_sets[_split]:
|
||||
mathbench_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
@ -86,15 +83,14 @@ for _split in list(mathbench_sets.keys()):
|
||||
),
|
||||
dict(role="BOT", prompt="{answer}")] if 'choice' in _name else cloze_prompts[_name],
|
||||
),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512,),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512),
|
||||
)
|
||||
|
||||
mathbench_eval_cfg = dict(
|
||||
evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD') if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
|
||||
|
||||
mathbench_datasets.append(
|
||||
dict(
|
||||
@ -110,5 +106,3 @@ for _split in list(mathbench_sets.keys()):
|
||||
infer_cfg=mathbench_infer_cfg,
|
||||
eval_cfg=mathbench_eval_cfg,
|
||||
))
|
||||
|
||||
del _split, _name
|
18
configs/summarizers/mathbench.py
Normal file
18
configs/summarizers/mathbench.py
Normal file
@ -0,0 +1,18 @@
|
||||
summarizer = dict(
|
||||
dataset_abbrs=[
|
||||
'######## MathBench Accuracy ########', # category
|
||||
['mathbench-college-single_choice_cn', 'acc_1'],
|
||||
['mathbench-college-cloze_en', 'accuracy'],
|
||||
['mathbench-high-single_choice_cn', 'acc_1'],
|
||||
['mathbench-high-single_choice_en', 'acc_1'],
|
||||
['mathbench-middle-single_choice_cn', 'acc_1'],
|
||||
['mathbench-primary-cloze_cn', 'accuracy'],
|
||||
'######## MathBench CircularEval ########', # category
|
||||
['mathbench-college-single_choice_cn', 'perf_4'],
|
||||
['mathbench-high-single_choice_cn', 'perf_4'],
|
||||
['mathbench-high-single_choice_en', 'perf_4'],
|
||||
['mathbench-middle-single_choice_cn', 'perf_4'],
|
||||
],
|
||||
summary_groups=sum(
|
||||
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])
|
||||
)
|
@ -71,10 +71,15 @@ class MathBenchDataset(BaseDataset):
|
||||
else:
|
||||
question = entry['question'].strip(
|
||||
) + '\n' + get_number(entry['options'])
|
||||
data.append({
|
||||
info = {
|
||||
'question': question,
|
||||
'answer': entry['answer'].strip()
|
||||
})
|
||||
}
|
||||
# For PPL evaluation
|
||||
for i in range(4):
|
||||
info[chr(ord('A') +
|
||||
i)] = entry['options'][i].strip()
|
||||
data.append(info)
|
||||
|
||||
dataset = Dataset.from_list(data)
|
||||
return dataset
|
||||
@ -91,7 +96,7 @@ def mathbench_postprocess(text: str, name: str) -> str:
|
||||
ans = ans_line[1].strip()
|
||||
|
||||
output = re.sub(r'(\d),(\d)', r'\1\2', ans)
|
||||
numbers = re.findall(r'-?\d*\.?\d+|\d+', output)
|
||||
numbers = re.findall(r'-?\d*\.?/?\d+|\d+', output)
|
||||
if numbers:
|
||||
return numbers[-1]
|
||||
|
||||
|
@ -53,9 +53,12 @@ def first_option_postprocess(text: str, options: str) -> str:
|
||||
|
||||
patterns = [
|
||||
f'[Tt]he answer is [{options}]',
|
||||
f'[Tt]he correct answer is [{options}]',
|
||||
f'答案是(.*?)[{options}]',
|
||||
f'答案为(.*?)[{options}]',
|
||||
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
|
||||
f'答案(?:选项)?是(.*?)[{options}]',
|
||||
f'答案(?:选项)?为(.*?)[{options}]',
|
||||
f'答案(?:选项)?选(.*?)[{options}]',
|
||||
f'选项[{options}]是?正确',
|
||||
f'选项[{options}]为?正确',
|
||||
f'固选(.*?)[{options}]',
|
||||
f'答案应该是(.*?)[{options}]',
|
||||
f'(\s|^)[{options}][\s。,,\.$]', # noqa
|
||||
|
Loading…
Reference in New Issue
Block a user