OpenCompass/configs/datasets/math/math_llm_judge.py
liushz a6f67e1a65
[Fix] Fix Math Evaluation with Judge Model Evaluator & Add README (#1103)
* Add Math Evaluation with Judge Model Evaluator

* Add Math Evaluation with Judge Model Evaluator

* Add Math Evaluation with Judge Model Evaluator

* Add Math Evaluation with Judge Model Evaluator

* Fix Llama-3 meta template

* Fix MATH with JudgeLM Evaluation

* Fix MATH with JudgeLM Evaluation

* Fix MATH with JudgeLM Evaluation

* Fix MATH with JudgeLM Evaluation

---------

Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn>
2024-04-28 21:58:58 +08:00

35 lines
1.2 KiB
Python

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
QUERY_TEMPLATE = """
Solve the following math problem step by step. The last line of your response should be of the form ANSWER: $ANSWER (without quotes) where $ANSWER is the answer to the problem.
{problem}
Remember to put your answer on its own line after "ANSWER:", and you do not need to use a \\boxed command.
""".strip()
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role="HUMAN", prompt=QUERY_TEMPLATE),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]