diff --git a/configs/datasets/dingo/dingo_gen.py b/configs/datasets/dingo/dingo_gen.py new file mode 100644 index 00000000..c36f6cdc --- /dev/null +++ b/configs/datasets/dingo/dingo_gen.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import DingoDataset, DingoEvaluator + + +dingo_paths = [ + './data/dingo/en_192.csv', + './data/dingo/zh_170.csv', +] + +dingo_datasets = [] +for path in dingo_paths: + dingo_reader_cfg = dict(input_columns='input', output_column=None) + dingo_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[dict(role='HUMAN', prompt='{input}')])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), + ) + dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT') + + dingo_datasets.append( + dict( + abbr='dingo_' + path.split('/')[-1].split('.csv')[0], + type=DingoDataset, + path=path, + reader_cfg=dingo_reader_cfg, + infer_cfg=dingo_infer_cfg, + eval_cfg=dingo_eval_cfg, + )) + +datasets = dingo_datasets diff --git a/configs/eval_dingo.py b/configs/eval_dingo.py new file mode 100644 index 00000000..3e0ecb86 --- /dev/null +++ b/configs/eval_dingo.py @@ -0,0 +1,7 @@ +from mmengine.config import read_base + +with read_base(): + from .models.hf_internlm.hf_internlm_7b import models + from .datasets.dingo.dingo_gen import datasets + +work_dir = './outputs/eval_dingo' diff --git a/opencompass/configs/datasets/dingo/dingo_gen.py b/opencompass/configs/datasets/dingo/dingo_gen.py new file mode 100644 index 00000000..c36f6cdc --- /dev/null +++ b/opencompass/configs/datasets/dingo/dingo_gen.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import DingoDataset, DingoEvaluator + + +dingo_paths = [ + './data/dingo/en_192.csv', + './data/dingo/zh_170.csv', +] + +dingo_datasets = [] +for path in dingo_paths: + dingo_reader_cfg = dict(input_columns='input', output_column=None) + dingo_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[dict(role='HUMAN', prompt='{input}')])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), + ) + dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT') + + dingo_datasets.append( + dict( + abbr='dingo_' + path.split('/')[-1].split('.csv')[0], + type=DingoDataset, + path=path, + reader_cfg=dingo_reader_cfg, + infer_cfg=dingo_infer_cfg, + eval_cfg=dingo_eval_cfg, + )) + +datasets = dingo_datasets diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index a1f201ef..8f178242 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -33,6 +33,7 @@ from .crowspairs_cn import * # noqa: F401, F403 from .csl import * # noqa: F401, F403 from .custom import * # noqa: F401, F403 from .cvalues import * # noqa: F401, F403 +from .dingo import * # noqa: F401, F403 from .drcd import * # noqa: F401, F403 from .drop import * # noqa: F401, F403 from .drop_simple_eval import * # noqa: F401, F403 diff --git a/opencompass/datasets/dingo.py b/opencompass/datasets/dingo.py new file mode 100644 index 00000000..753d78dd --- /dev/null +++ b/opencompass/datasets/dingo.py @@ -0,0 +1,84 @@ +# flake8: nodingo +# yapf: disable +import csv +import json +import os +import time +from typing import List + +from datasets import Dataset + +from opencompass.openicl.icl_evaluator import BaseEvaluator +from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class DingoDataset(BaseDataset): + + @staticmethod + def load(path: str): + raw_data = [] + with open(path, encoding='utf-8') as f: + reader = csv.reader(f, delimiter=';') + for row in reader: + if len(row) < 1: + row = [''] + raw_data.append({'input': row[0]}) + return Dataset.from_list(raw_data) + + +@LOAD_DATASET.register_module() +class DingoLongDataset(BaseDataset): + + @staticmethod + def load(path: str): + raw_data = [] + with open(path, 'r', encoding='utf-8') as f: + for line in f: + raw_data.append({'input': json.loads(line).get('input')}) + return Dataset.from_list(raw_data) + + +@ICL_EVALUATORS.register_module() +class DingoEvaluator(BaseEvaluator): + + def score(self, origin_prompt: List, predictions: List) -> dict: + try: + # from dingo.model.model import Model + from dingo.exec import Executor + from dingo.io import InputArgs + except Exception: + raise ModuleNotFoundError( + '=========== ' + 'dingo register fail. please try: pip install dingo-python.' + ' ===========') + + current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + file_data = [{'prompt': pmt, 'prediction': prd} + for pmt, prd in zip(origin_prompt, predictions)] + file_name = 'dingo_file_' + current_time + '.jsonl' + with open(file_name, 'a', encoding='utf-8') as f: + for d in file_data: + json.dump(d, f, ensure_ascii=False) + f.write('\n') + + input_data = { + 'eval_models': ['llm_base'], + 'input_path': file_name, + 'output_path': './outputs/dingo/', + 'dataset': 'local', + 'datasource': 'local', + 'data_format': 'jsonl', + 'column_prompt': ['prompt'], + 'column_content': ['prediction'], + } + # Model.apply_config(input_data["custom_config_path"]) + input_args = InputArgs(**input_data) + executor = Executor.exec_map['local'](input_args) + result = executor.execute() + summary = result[0].to_dict() + + os.remove(file_name) + return summary diff --git a/requirements/extra.txt b/requirements/extra.txt index 21834834..efeef772 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -1,6 +1,7 @@ # Alpaca-eval alpaca-eval==0.6 cn2an +dingo-python # Icl topk retriever faiss_gpu==1.7.2 # Humaneval, Humaneval X