mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
add writingbench
This commit is contained in:
parent
b93afe7764
commit
e6f4276412
@ -42,7 +42,7 @@ for _name in subjective_all_sets:
|
|||||||
dict(
|
dict(
|
||||||
role='SYSTEM',
|
role='SYSTEM',
|
||||||
fallback_role='HUMAN',
|
fallback_role='HUMAN',
|
||||||
prompt="You are an expert evaluator with extensive experience in evaluating response of given query.")
|
prompt='You are an expert evaluator with extensive experience in evaluating response of given query.')
|
||||||
],
|
],
|
||||||
round=[
|
round=[
|
||||||
dict(
|
dict(
|
||||||
|
@ -12,7 +12,6 @@ from opencompass.utils import get_data_path
|
|||||||
from ..base import BaseDataset
|
from ..base import BaseDataset
|
||||||
from .utils import get_judgeanswer_and_reference
|
from .utils import get_judgeanswer_and_reference
|
||||||
|
|
||||||
|
|
||||||
base_prompt = """Evaluate the Response based on the Query and criteria provided.
|
base_prompt = """Evaluate the Response based on the Query and criteria provided.
|
||||||
|
|
||||||
** Criteria **
|
** Criteria **
|
||||||
@ -43,6 +42,7 @@ Return the results in the following JSON format, Only output this JSON format an
|
|||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@LOAD_DATASET.register_module()
|
@LOAD_DATASET.register_module()
|
||||||
class WritingBenchDataset(BaseDataset):
|
class WritingBenchDataset(BaseDataset):
|
||||||
|
|
||||||
@ -59,7 +59,9 @@ class WritingBenchDataset(BaseDataset):
|
|||||||
criteria = data['criteria']
|
criteria = data['criteria']
|
||||||
judge_prompt_list = []
|
judge_prompt_list = []
|
||||||
for criteria_item in criteria:
|
for criteria_item in criteria:
|
||||||
temp_prompt = base_prompt.format(question=query, criteria=criteria_item, prediction='{prediction}')
|
temp_prompt = base_prompt.format(question=query,
|
||||||
|
criteria=criteria_item,
|
||||||
|
prediction='{prediction}')
|
||||||
judge_prompt_list.append(temp_prompt)
|
judge_prompt_list.append(temp_prompt)
|
||||||
idx = data['index']
|
idx = data['index']
|
||||||
raw_data.append({
|
raw_data.append({
|
||||||
|
Loading…
Reference in New Issue
Block a user