Update

2025-05-30 16:03:24 +08:00 · 2025-02-27 04:47:50 +00:00 · 2025-02-27 04:47:50 +00:00 · effddff840
commit effddff840
parent ff621ddb39
1 changed files with 3 additions and 3 deletions
--- a/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_85d0ef.py
+++ b/opencompass/configs/datasets/livemathbench/livemathbench_hard_custom_llmverify_gen_85d0ef.py
@ -9,7 +9,7 @@ from itertools import product
 # from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


-livemathbench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
+livemathbench_reader_cfg = dict(input_columns=['question'], output_column='answer')


 # Inference configuration
@ -20,7 +20,7 @@ livemathbench_infer_cfg = dict(
            round=[
                dict(
                    role='HUMAN',
-                    prompt='{prompt}\nRemember to put your final answer within \\boxed{}.',
+                    prompt='{question}\n',
                ),
            ]
        ),
@ -45,7 +45,7 @@ GRADER_TEMPLATE = """
    B: INCORRECT
    Just return the letters "A" or "B", with no text around it.
    Here is your task. Simply reply with either CORRECT, INCORRECT. Don't apologize or correct yourself if there was a mistake; we are just trying to grade the answer.
-    <Original Question Begin>: \n{prompt}\n<Original Question End>\n\n
+    <Original Question Begin>: \n{question}\n<Original Question End>\n\n
    <Gold Target Begin>: \n{answer}\n<Gold Target End>\n\n
    <Predicted Answer Begin>: \n{prediction}\n<Predicted End>\n\n