[Update] Add 0shot CoT config for TheoremQA (#1783)

This commit is contained in:
Linchen Xiao 2024-12-27 16:17:27 +08:00 committed by GitHub
parent 357ce8c7a4
commit 42b54d6bb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 65 additions and 1 deletions

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import (
TheoremQADataset,
TheoremQA_postprocess_v3,
TheoremQA_postprocess_v4,
TheoremQAEvaluatorV3,
)
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test',
)
TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = "Question: {Question}\nLet's think step by step."
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt=TheoremQA_prompt1 + TheoremQA_prompt2,
),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
# 正确的 evaluator 需要借助于 llm 来进行答案提取,此评测逻辑亦会有较多 FN 。
TheoremQA_eval_cfg = dict(
evaluator=dict(type=TheoremQAEvaluatorV3),
pred_postprocessor=dict(type=TheoremQA_postprocess_v4),
)
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path='./data/TheoremQA/test.csv',
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg,
)
]

View File

@ -1,4 +1,4 @@
from .legacy import (TheoremQA_postprocess, TheoremQA_postprocess_v2,
TheoremQADataset)
from .main import (TheoremQA_postprocess_v3, TheoremQADatasetV3,
TheoremQAEvaluatorV3)
TheoremQAEvaluatorV3, TheoremQA_postprocess_v4)

View File

@ -30,6 +30,13 @@ def TheoremQA_postprocess_v3(text: str) -> str:
answer = utils.answer_clean(["The answer is:", "The answer is", "the answer is"], text)
return answer
def TheoremQA_postprocess_v4(text: str) -> str:
# First clean the answer text
answer = utils.answer_clean(["The answer is:", "The answer is", "the answer is"], text)
# Remove LaTeX delimiters \( and \) and strip whitespace
answer = answer.strip('\\(').strip('\\)').strip()
return answer
@ICL_EVALUATORS.register_module()
class TheoremQAEvaluatorV3(BaseEvaluator):