From 7d2d663ae354dd866f2d1533c4762848af04faf7 Mon Sep 17 00:00:00 2001 From: Myhs-phz Date: Mon, 14 Apr 2025 11:35:42 +0000 Subject: [PATCH] fix --- .../datasets/ClimaQA/ClimaQA_Gold_llm_judge_gen_f15343.py | 2 +- .../datasets/ClimaQA/ClimaQA_Silver_llm_judge_gen_f15343.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opencompass/configs/datasets/ClimaQA/ClimaQA_Gold_llm_judge_gen_f15343.py b/opencompass/configs/datasets/ClimaQA/ClimaQA_Gold_llm_judge_gen_f15343.py index 45b01f96..1ec9283d 100644 --- a/opencompass/configs/datasets/ClimaQA/ClimaQA_Gold_llm_judge_gen_f15343.py +++ b/opencompass/configs/datasets/ClimaQA/ClimaQA_Gold_llm_judge_gen_f15343.py @@ -17,7 +17,7 @@ GRADER_TEMPLATE_mcq = """ Here are some evaluation criteria: 1. Please refer to the given standard answer. You don't need to re-generate the answer to the question because the standard answer has been given. You only need to judge whether the candidate's answer is consistent with the standard answer according to the form of the question. Don't try to answer the original question. You can assume that the standard answer is definitely correct. 2. The answer may be one of the four options: a, b, c, or d. Only when the options given by prediction are strictly consistent with the answer, the prediction can be considered correct. - 5. If the prediction is given with 'The answer is:', please ignore the 'The answer is:', and only judge whether the candidate's answer is consistent with the standard answer. + 3. If the prediction is given with 'The answer is:', please ignore the 'The answer is:', and only judge whether the candidate's answer is consistent with the standard answer. Please judge whether the following answers are consistent with the standard answer based on the above criteria. Grade the predicted answer of this new question as one of: diff --git a/opencompass/configs/datasets/ClimaQA/ClimaQA_Silver_llm_judge_gen_f15343.py b/opencompass/configs/datasets/ClimaQA/ClimaQA_Silver_llm_judge_gen_f15343.py index b13ba38f..afe64d9d 100644 --- a/opencompass/configs/datasets/ClimaQA/ClimaQA_Silver_llm_judge_gen_f15343.py +++ b/opencompass/configs/datasets/ClimaQA/ClimaQA_Silver_llm_judge_gen_f15343.py @@ -17,7 +17,7 @@ GRADER_TEMPLATE_mcq = """ Here are some evaluation criteria: 1. Please refer to the given standard answer. You don't need to re-generate the answer to the question because the standard answer has been given. You only need to judge whether the candidate's answer is consistent with the standard answer according to the form of the question. Don't try to answer the original question. You can assume that the standard answer is definitely correct. 2. The answer may be one of the four options: a, b, c, or d. Only when the options given by prediction are strictly consistent with the answer, the prediction can be considered correct. - 5. If the prediction is given with 'The answer is:', please ignore the 'The answer is:', and only judge whether the candidate's answer is consistent with the standard answer. + 3. If the prediction is given with 'The answer is:', please ignore the 'The answer is:', and only judge whether the candidate's answer is consistent with the standard answer. Please judge whether the following answers are consistent with the standard answer based on the above criteria. Grade the predicted answer of this new question as one of: