mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add dingo test (#1529)
* add qa dingo * update * change name qa to dingo * eval model: llm_base * update path * change name and move path * add eval_dingo * update import * add for pip * add dingo package * change import place * update import place * fix lint fail * isort * double quoted --------- Co-authored-by: sj <shijin@pjlab.org.cn>
This commit is contained in:
parent
85a28874aa
commit
7528b8ab8a
34
configs/datasets/dingo/dingo_gen.py
Normal file
34
configs/datasets/dingo/dingo_gen.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import DingoDataset, DingoEvaluator
|
||||
|
||||
|
||||
dingo_paths = [
|
||||
'./data/dingo/en_192.csv',
|
||||
'./data/dingo/zh_170.csv',
|
||||
]
|
||||
|
||||
dingo_datasets = []
|
||||
for path in dingo_paths:
|
||||
dingo_reader_cfg = dict(input_columns='input', output_column=None)
|
||||
dingo_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[dict(role='HUMAN', prompt='{input}')])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT')
|
||||
|
||||
dingo_datasets.append(
|
||||
dict(
|
||||
abbr='dingo_' + path.split('/')[-1].split('.csv')[0],
|
||||
type=DingoDataset,
|
||||
path=path,
|
||||
reader_cfg=dingo_reader_cfg,
|
||||
infer_cfg=dingo_infer_cfg,
|
||||
eval_cfg=dingo_eval_cfg,
|
||||
))
|
||||
|
||||
datasets = dingo_datasets
|
7
configs/eval_dingo.py
Normal file
7
configs/eval_dingo.py
Normal file
@ -0,0 +1,7 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .models.hf_internlm.hf_internlm_7b import models
|
||||
from .datasets.dingo.dingo_gen import datasets
|
||||
|
||||
work_dir = './outputs/eval_dingo'
|
34
opencompass/configs/datasets/dingo/dingo_gen.py
Normal file
34
opencompass/configs/datasets/dingo/dingo_gen.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import DingoDataset, DingoEvaluator
|
||||
|
||||
|
||||
dingo_paths = [
|
||||
'./data/dingo/en_192.csv',
|
||||
'./data/dingo/zh_170.csv',
|
||||
]
|
||||
|
||||
dingo_datasets = []
|
||||
for path in dingo_paths:
|
||||
dingo_reader_cfg = dict(input_columns='input', output_column=None)
|
||||
dingo_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[dict(role='HUMAN', prompt='{input}')])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT')
|
||||
|
||||
dingo_datasets.append(
|
||||
dict(
|
||||
abbr='dingo_' + path.split('/')[-1].split('.csv')[0],
|
||||
type=DingoDataset,
|
||||
path=path,
|
||||
reader_cfg=dingo_reader_cfg,
|
||||
infer_cfg=dingo_infer_cfg,
|
||||
eval_cfg=dingo_eval_cfg,
|
||||
))
|
||||
|
||||
datasets = dingo_datasets
|
@ -33,6 +33,7 @@ from .crowspairs_cn import * # noqa: F401, F403
|
||||
from .csl import * # noqa: F401, F403
|
||||
from .custom import * # noqa: F401, F403
|
||||
from .cvalues import * # noqa: F401, F403
|
||||
from .dingo import * # noqa: F401, F403
|
||||
from .drcd import * # noqa: F401, F403
|
||||
from .drop import * # noqa: F401, F403
|
||||
from .drop_simple_eval import * # noqa: F401, F403
|
||||
|
84
opencompass/datasets/dingo.py
Normal file
84
opencompass/datasets/dingo.py
Normal file
@ -0,0 +1,84 @@
|
||||
# flake8: nodingo
|
||||
# yapf: disable
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class DingoDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
raw_data = []
|
||||
with open(path, encoding='utf-8') as f:
|
||||
reader = csv.reader(f, delimiter=';')
|
||||
for row in reader:
|
||||
if len(row) < 1:
|
||||
row = ['']
|
||||
raw_data.append({'input': row[0]})
|
||||
return Dataset.from_list(raw_data)
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class DingoLongDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
raw_data = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
raw_data.append({'input': json.loads(line).get('input')})
|
||||
return Dataset.from_list(raw_data)
|
||||
|
||||
|
||||
@ICL_EVALUATORS.register_module()
|
||||
class DingoEvaluator(BaseEvaluator):
|
||||
|
||||
def score(self, origin_prompt: List, predictions: List) -> dict:
|
||||
try:
|
||||
# from dingo.model.model import Model
|
||||
from dingo.exec import Executor
|
||||
from dingo.io import InputArgs
|
||||
except Exception:
|
||||
raise ModuleNotFoundError(
|
||||
'=========== '
|
||||
'dingo register fail. please try: pip install dingo-python.'
|
||||
' ===========')
|
||||
|
||||
current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime())
|
||||
file_data = [{'prompt': pmt, 'prediction': prd}
|
||||
for pmt, prd in zip(origin_prompt, predictions)]
|
||||
file_name = 'dingo_file_' + current_time + '.jsonl'
|
||||
with open(file_name, 'a', encoding='utf-8') as f:
|
||||
for d in file_data:
|
||||
json.dump(d, f, ensure_ascii=False)
|
||||
f.write('\n')
|
||||
|
||||
input_data = {
|
||||
'eval_models': ['llm_base'],
|
||||
'input_path': file_name,
|
||||
'output_path': './outputs/dingo/',
|
||||
'dataset': 'local',
|
||||
'datasource': 'local',
|
||||
'data_format': 'jsonl',
|
||||
'column_prompt': ['prompt'],
|
||||
'column_content': ['prediction'],
|
||||
}
|
||||
# Model.apply_config(input_data["custom_config_path"])
|
||||
input_args = InputArgs(**input_data)
|
||||
executor = Executor.exec_map['local'](input_args)
|
||||
result = executor.execute()
|
||||
summary = result[0].to_dict()
|
||||
|
||||
os.remove(file_name)
|
||||
return summary
|
@ -1,6 +1,7 @@
|
||||
# Alpaca-eval
|
||||
alpaca-eval==0.6
|
||||
cn2an
|
||||
dingo-python
|
||||
# Icl topk retriever
|
||||
faiss_gpu==1.7.2
|
||||
# Humaneval, Humaneval X
|
||||
|
Loading…
Reference in New Issue
Block a user