From effddff840bad7a08763354ed5fba05e1e5349c6 Mon Sep 17 00:00:00 2001 From: zhangsongyang Date: Thu, 27 Feb 2025 04:47:50 +0000 Subject: [PATCH] Update --- ...py => livemathbench_hard_custom_llmverify_gen_85d0ef.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename opencompass/configs/datasets/livemathbench/{livemathbench_hard_custom_llmverify_gen_9e7505.py => livemathbench_hard_custom_llmverify_gen_85d0ef.py} (95%) diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_9e7505.py b/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_85d0ef.py similarity index 95% rename from opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_9e7505.py rename to opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_85d0ef.py index cc787456..62d6b1bc 100644 --- a/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_9e7505.py +++ b/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_85d0ef.py @@ -9,7 +9,7 @@ from itertools import product # from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator -livemathbench_reader_cfg = dict(input_columns=['prompt'], output_column='answer') +livemathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') # Inference configuration @@ -20,7 +20,7 @@ livemathbench_infer_cfg = dict( round=[ dict( role='HUMAN', - prompt='{prompt}\nRemember to put your final answer within \\boxed{}.', + prompt='{question}\n', ), ] ), @@ -45,7 +45,7 @@ GRADER_TEMPLATE = """ B: INCORRECT Just return the letters "A" or "B", with no text around it. Here is your task. Simply reply with either CORRECT, INCORRECT. Don't apologize or correct yourself if there was a mistake; we are just trying to grade the answer. - : \n{prompt}\n\n\n + : \n{question}\n\n\n : \n{answer}\n\n\n : \n{prediction}\n\n\n