[Update] Dingo Dataset update (#1670)

* [Update] Dingo Dataset update

* update
This commit is contained in:
Linchen Xiao 2024-11-08 14:38:43 +08:00 committed by GitHub
parent 835bf75a36
commit a0ef2fd3b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 15 deletions

View File

@ -10,6 +10,7 @@ from datasets import Dataset
from opencompass.openicl.icl_evaluator import BaseEvaluator from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
from opencompass.utils import get_data_path
from .base import BaseDataset from .base import BaseDataset
@ -19,6 +20,7 @@ class DingoDataset(BaseDataset):
@staticmethod @staticmethod
def load(path: str): def load(path: str):
path = get_data_path(path, local_mode=True)
raw_data = [] raw_data = []
with open(path, encoding='utf-8') as f: with open(path, encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';') reader = csv.reader(f, delimiter=';')
@ -34,6 +36,7 @@ class DingoLongDataset(BaseDataset):
@staticmethod @staticmethod
def load(path: str): def load(path: str):
path = get_data_path(path, local_mode=True)
raw_data = [] raw_data = []
with open(path, 'r', encoding='utf-8') as f: with open(path, 'r', encoding='utf-8') as f:
for line in f: for line in f:
@ -46,7 +49,6 @@ class DingoEvaluator(BaseEvaluator):
def score(self, origin_prompt: List, predictions: List) -> dict: def score(self, origin_prompt: List, predictions: List) -> dict:
try: try:
# from dingo.model.model import Model
from dingo.exec import Executor from dingo.exec import Executor
from dingo.io import InputArgs from dingo.io import InputArgs
except Exception: except Exception:
@ -58,27 +60,30 @@ class DingoEvaluator(BaseEvaluator):
current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime()) current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime())
file_data = [{'prompt': pmt, 'prediction': prd} file_data = [{'prompt': pmt, 'prediction': prd}
for pmt, prd in zip(origin_prompt, predictions)] for pmt, prd in zip(origin_prompt, predictions)]
file_name = 'dingo_file_' + current_time + '.jsonl' os.makedirs('tmp', exist_ok=True)
file_name = os.path.join('tmp', 'dingo_file_' + current_time + '.jsonl') # noqa: E501
with open(file_name, 'a', encoding='utf-8') as f: with open(file_name, 'a', encoding='utf-8') as f:
for d in file_data: for d in file_data:
json.dump(d, f, ensure_ascii=False) json.dump(d, f, ensure_ascii=False)
f.write('\n') f.write('\n')
input_data = { input_data = {
'eval_models': ['llm_base'], 'eval_model': 'llm_base',
'input_path': file_name, 'input_path': file_name,
'output_path': './outputs/dingo/', 'output_path': './outputs/dingo/',
'save_data': True,
'dataset': 'local', 'dataset': 'local',
'datasource': 'local',
'data_format': 'jsonl', 'data_format': 'jsonl',
'column_prompt': ['prompt'], 'column_prompt': 'prompt',
'column_content': ['prediction'], 'column_content': 'prediction',
} }
# Model.apply_config(input_data["custom_config_path"]) try:
input_args = InputArgs(**input_data) input_args = InputArgs(**input_data)
executor = Executor.exec_map['local'](input_args) executor = Executor.exec_map['local'](input_args)
result = executor.execute() result = executor.execute()
summary = result[0].to_dict() summary = result[0].to_dict()
except Exception:
os.remove(file_name) raise
finally:
os.remove(file_name)
return summary return summary

View File

@ -1,7 +1,8 @@
# Alpaca-eval # Alpaca-eval
alpaca-eval==0.6 alpaca-eval==0.6
cn2an cn2an
dingo-python # Dingo
dingo-python==1.1.2
# Icl topk retriever # Icl topk retriever
faiss_gpu==1.7.2 faiss_gpu==1.7.2
# Humaneval, Humaneval X # Humaneval, Humaneval X