add writingbench

2025-05-30 16:03:24 +08:00 · 2025-04-18 10:42:29 +00:00 · 2025-04-18 10:42:29 +00:00 · e6f4276412
commit e6f4276412
parent b93afe7764
2 changed files with 5 additions and 3 deletions
--- a/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py
+++ b/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py
@ -42,7 +42,7 @@ for _name in subjective_all_sets:
                    dict(
                        role='SYSTEM',
                        fallback_role='HUMAN',
-                        prompt="You are an expert evaluator with extensive experience in evaluating response of given query.")
+                        prompt='You are an expert evaluator with extensive experience in evaluating response of given query.')
                ],
                    round=[
                    dict(
--- a/opencompass/datasets/subjective/writingbench.py
+++ b/opencompass/datasets/subjective/writingbench.py
@ -12,7 +12,6 @@ from opencompass.utils import get_data_path
 from ..base import BaseDataset
 from .utils import get_judgeanswer_and_reference
 base_prompt = """Evaluate the Response based on the Query and criteria provided.
 ** Criteria **
@ -43,6 +42,7 @@ Return the results in the following JSON format, Only output this JSON format an
 ```
 """
@LOAD_DATASET.register_module()
 class WritingBenchDataset(BaseDataset):
@ -59,7 +59,9 @@ class WritingBenchDataset(BaseDataset):
                criteria = data['criteria']
                judge_prompt_list = []
                for criteria_item in criteria:
-                    temp_prompt = base_prompt.format(question=query, criteria=criteria_item, prediction='{prediction}')
+                    temp_prompt = base_prompt.format(question=query,
                                                     criteria=criteria_item,
                                                     prediction='{prediction}')
                    judge_prompt_list.append(temp_prompt)
                idx = data['index']
                raw_data.append({