mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add rewardbench * add rewardbench * add rmb datasets * add rmb datasets * add judgebench * add judgebench
58 lines
2.0 KiB
Python
58 lines
2.0 KiB
Python
# flake8: noqa
|
|
import json
|
|
import os.path as osp
|
|
import re
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from datasets import Dataset
|
|
|
|
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
|
from opencompass.registry import (DICT_POSTPROCESSORS, ICL_EVALUATORS,
|
|
LOAD_DATASET)
|
|
from opencompass.utils import get_data_path
|
|
|
|
from ..base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class JudgeBenchDataset(BaseDataset):
|
|
|
|
def load(self, path: str, name: str, *args, **kwargs):
|
|
|
|
path = get_data_path(path, local_mode=True)
|
|
filename = osp.join(path, f'{name}')
|
|
raw_data = []
|
|
with open(filename, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for item in data:
|
|
conversation_a = item['chosen']
|
|
conversation_b = item['rejected']
|
|
model_a = item['chosen_model']
|
|
model_b = item['rejected_model']
|
|
question = item['prompt']
|
|
winner = item['winner']
|
|
if winner == 'B':
|
|
conversation_a, conversation_b = conversation_b, conversation_a
|
|
model_a, model_b = model_b, model_a
|
|
subset = item['subset']
|
|
lan = 'en'
|
|
raw_data.append({
|
|
'question': question,
|
|
'answerA': conversation_a,
|
|
'answerB': conversation_b,
|
|
'judge': {
|
|
'prompt': item['prompt'],
|
|
'Answer_A': conversation_a,
|
|
'Answer_B': conversation_b,
|
|
'subset': subset,
|
|
'winner': winner,
|
|
'model_a': model_a,
|
|
'model_b': model_b,
|
|
'dataset_name': 'rewardbench',
|
|
'lan': lan
|
|
}
|
|
})
|
|
dataset = Dataset.from_list(raw_data)
|
|
return dataset
|