Mathbench update postprocess (#600)

* Update mathbench

* Update mathbench
This commit is contained in:
liushz 2023-11-20 16:48:55 +08:00 committed by GitHub
parent 5e75e29711
commit c9c5c5d92e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 21 deletions

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .mathbench_gen_10da90 import mathbench_datasets # noqa: F401, F403
from .mathbench_gen_ad37c1 import mathbench_datasets # noqa: F401, F403

View File

@ -3,17 +3,17 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
from opencompass.datasets import MathBenchDataset, mathbench_postprocess
from opencompass.utils.text_postprocessors import first_capital_postprocess
from opencompass.utils.text_postprocessors import first_option_postprocess
single_choice_prompts = {
"single_choice_cn_with_reasoning": "以下是一道关于数学的单项选择题,请你一步一步推理并得到最终的答案选项。回答格式为如下:\n答案选项A、B、C、D中你认为正确的一个选项\n计算过程:根据题目得到选项答案的一步步过程\n请严格按照上面的格式回答问题,下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你给出正确的答案选项\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你直接回答正确答案的选项序号\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_en_with_reasoning": "Here is a multiple-choice question about mathematics. Please provide the final answer option by step-by-step reasoning. Please answer in the following format:\nAnswer option: A, B, C, or D (the option you believe is correct)\nCalculation process: Step-by-step process to derive the answer option based on the question\nPlease strictly follow the above format to answer the question. Here is the question you need to answer:\n{question}\nAnswer option:",
"single_choice_en": "Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nAnswer option:",
}
cloze_prompts={
cloze_prompts = {
"cloze_cn": [
dict(role='HUMAN', prompt='Q: 林中有15棵树。林务工人员今天将在林中种植树木。完成后将有21棵树。林务工人员今天种植了多少棵树'),
dict(role='BOT', prompt='A: 我们从15棵树开始。后来有21棵树。差值必定是他们种植的树木数量。所以他们必须种植了21 - 15 = 6棵树。答案是 6\n'),
@ -53,15 +53,13 @@ cloze_prompts={
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
}
]}
mathbench_sets = {
'college': ['single_choice_cn', 'cloze_en'],
'high': ['single_choice_cn', 'single_choice_en'],
'middle': ['single_choice_cn'],
'primary': ['cloze_cn'],
'primary': ['cloze_cn']
}
# Generate reasoning path if set True or just generate the final answer
@ -75,10 +73,9 @@ mathbench_datasets = []
for _split in list(mathbench_sets.keys()):
for _name in mathbench_sets[_split]:
mathbench_infer_cfg = dict(
ice_template=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
@ -86,15 +83,14 @@ for _split in list(mathbench_sets.keys()):
),
dict(role="BOT", prompt="{answer}")] if 'choice' in _name else cloze_prompts[_name],
),
ice_token="</E>",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512,),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
mathbench_eval_cfg = dict(
evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD') if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
mathbench_datasets.append(
dict(
@ -110,5 +106,3 @@ for _split in list(mathbench_sets.keys()):
infer_cfg=mathbench_infer_cfg,
eval_cfg=mathbench_eval_cfg,
))
del _split, _name

View File

@ -0,0 +1,18 @@
summarizer = dict(
dataset_abbrs=[
'######## MathBench Accuracy ########', # category
['mathbench-college-single_choice_cn', 'acc_1'],
['mathbench-college-cloze_en', 'accuracy'],
['mathbench-high-single_choice_cn', 'acc_1'],
['mathbench-high-single_choice_en', 'acc_1'],
['mathbench-middle-single_choice_cn', 'acc_1'],
['mathbench-primary-cloze_cn', 'accuracy'],
'######## MathBench CircularEval ########', # category
['mathbench-college-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_en', 'perf_4'],
['mathbench-middle-single_choice_cn', 'perf_4'],
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)

View File

@ -71,10 +71,15 @@ class MathBenchDataset(BaseDataset):
else:
question = entry['question'].strip(
) + '\n' + get_number(entry['options'])
data.append({
info = {
'question': question,
'answer': entry['answer'].strip()
})
}
# For PPL evaluation
for i in range(4):
info[chr(ord('A') +
i)] = entry['options'][i].strip()
data.append(info)
dataset = Dataset.from_list(data)
return dataset
@ -91,7 +96,7 @@ def mathbench_postprocess(text: str, name: str) -> str:
ans = ans_line[1].strip()
output = re.sub(r'(\d),(\d)', r'\1\2', ans)
numbers = re.findall(r'-?\d*\.?\d+|\d+', output)
numbers = re.findall(r'-?\d*\.?/?\d+|\d+', output)
if numbers:
return numbers[-1]

View File

@ -53,9 +53,12 @@ def first_option_postprocess(text: str, options: str) -> str:
patterns = [
f'[Tt]he answer is [{options}]',
f'[Tt]he correct answer is [{options}]',
f'答案是(.*?)[{options}]',
f'答案为(.*?)[{options}]',
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
f'答案(?:选项)?是(.*?)[{options}]',
f'答案(?:选项)?为(.*?)[{options}]',
f'答案(?:选项)?选(.*?)[{options}]',
f'选项[{options}]是?正确',
f'选项[{options}]为?正确',
f'固选(.*?)[{options}]',
f'答案应该是(.*?)[{options}]',
f'(\s|^)[{options}][\s。,\.$]', # noqa