mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
add writingbench
This commit is contained in:
parent
b93afe7764
commit
e6f4276412
@ -42,7 +42,7 @@ for _name in subjective_all_sets:
|
||||
dict(
|
||||
role='SYSTEM',
|
||||
fallback_role='HUMAN',
|
||||
prompt="You are an expert evaluator with extensive experience in evaluating response of given query.")
|
||||
prompt='You are an expert evaluator with extensive experience in evaluating response of given query.')
|
||||
],
|
||||
round=[
|
||||
dict(
|
||||
|
@ -12,7 +12,6 @@ from opencompass.utils import get_data_path
|
||||
from ..base import BaseDataset
|
||||
from .utils import get_judgeanswer_and_reference
|
||||
|
||||
|
||||
base_prompt = """Evaluate the Response based on the Query and criteria provided.
|
||||
|
||||
** Criteria **
|
||||
@ -43,6 +42,7 @@ Return the results in the following JSON format, Only output this JSON format an
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class WritingBenchDataset(BaseDataset):
|
||||
|
||||
@ -59,7 +59,9 @@ class WritingBenchDataset(BaseDataset):
|
||||
criteria = data['criteria']
|
||||
judge_prompt_list = []
|
||||
for criteria_item in criteria:
|
||||
temp_prompt = base_prompt.format(question=query, criteria=criteria_item, prediction='{prediction}')
|
||||
temp_prompt = base_prompt.format(question=query,
|
||||
criteria=criteria_item,
|
||||
prediction='{prediction}')
|
||||
judge_prompt_list.append(temp_prompt)
|
||||
idx = data['index']
|
||||
raw_data.append({
|
||||
|
Loading…
Reference in New Issue
Block a user