add writingbench

This commit is contained in:
bittersweet1999 2025-04-18 10:42:29 +00:00
parent b93afe7764
commit e6f4276412
2 changed files with 5 additions and 3 deletions

View File

@ -42,7 +42,7 @@ for _name in subjective_all_sets:
dict( dict(
role='SYSTEM', role='SYSTEM',
fallback_role='HUMAN', fallback_role='HUMAN',
prompt="You are an expert evaluator with extensive experience in evaluating response of given query.") prompt='You are an expert evaluator with extensive experience in evaluating response of given query.')
], ],
round=[ round=[
dict( dict(

View File

@ -12,7 +12,6 @@ from opencompass.utils import get_data_path
from ..base import BaseDataset from ..base import BaseDataset
from .utils import get_judgeanswer_and_reference from .utils import get_judgeanswer_and_reference
base_prompt = """Evaluate the Response based on the Query and criteria provided. base_prompt = """Evaluate the Response based on the Query and criteria provided.
** Criteria ** ** Criteria **
@ -43,6 +42,7 @@ Return the results in the following JSON format, Only output this JSON format an
``` ```
""" """
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class WritingBenchDataset(BaseDataset): class WritingBenchDataset(BaseDataset):
@ -59,7 +59,9 @@ class WritingBenchDataset(BaseDataset):
criteria = data['criteria'] criteria = data['criteria']
judge_prompt_list = [] judge_prompt_list = []
for criteria_item in criteria: for criteria_item in criteria:
temp_prompt = base_prompt.format(question=query, criteria=criteria_item, prediction='{prediction}') temp_prompt = base_prompt.format(question=query,
criteria=criteria_item,
prediction='{prediction}')
judge_prompt_list.append(temp_prompt) judge_prompt_list.append(temp_prompt)
idx = data['index'] idx = data['index']
raw_data.append({ raw_data.append({