From e6f4276412eb17fa258ea6035b67809f3f6d6ed7 Mon Sep 17 00:00:00 2001 From: bittersweet1999 <1487910649@qq.com> Date: Fri, 18 Apr 2025 10:42:29 +0000 Subject: [PATCH] add writingbench --- .../datasets/subjective/writingbench/writingbench_judge.py | 2 +- opencompass/datasets/subjective/writingbench.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py b/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py index 64e46172..fb61c35c 100644 --- a/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py +++ b/opencompass/configs/datasets/subjective/writingbench/writingbench_judge.py @@ -42,7 +42,7 @@ for _name in subjective_all_sets: dict( role='SYSTEM', fallback_role='HUMAN', - prompt="You are an expert evaluator with extensive experience in evaluating response of given query.") + prompt='You are an expert evaluator with extensive experience in evaluating response of given query.') ], round=[ dict( diff --git a/opencompass/datasets/subjective/writingbench.py b/opencompass/datasets/subjective/writingbench.py index fb6b0a40..312dd58e 100644 --- a/opencompass/datasets/subjective/writingbench.py +++ b/opencompass/datasets/subjective/writingbench.py @@ -12,7 +12,6 @@ from opencompass.utils import get_data_path from ..base import BaseDataset from .utils import get_judgeanswer_and_reference - base_prompt = """Evaluate the Response based on the Query and criteria provided. ** Criteria ** @@ -43,6 +42,7 @@ Return the results in the following JSON format, Only output this JSON format an ``` """ + @LOAD_DATASET.register_module() class WritingBenchDataset(BaseDataset): @@ -59,7 +59,9 @@ class WritingBenchDataset(BaseDataset): criteria = data['criteria'] judge_prompt_list = [] for criteria_item in criteria: - temp_prompt = base_prompt.format(question=query, criteria=criteria_item, prediction='{prediction}') + temp_prompt = base_prompt.format(question=query, + criteria=criteria_item, + prediction='{prediction}') judge_prompt_list.append(temp_prompt) idx = data['index'] raw_data.append({