From c5224c2a91e71d185a00ddcd5358dced0d75bb2e Mon Sep 17 00:00:00 2001 From: liushz Date: Fri, 22 Sep 2023 18:46:33 +0800 Subject: [PATCH] [Feature] Add kaoshi dataset (#392) * Add ToT method * Update ToT * Update ToT * Update ToT * Update ToT * Update ToT * Add Koashi * Update Kaoshi * Update Kaoshi * Update kaoshi * Update kaoshi * Update Kaoshi * Update Kaoshi * Update Kaoshi * Update Kaoshi * update Kaoshi * update * update * fix --------- Co-authored-by: gaotongxiao --- configs/datasets/kaoshi/kaoshi_gen.py | 4 + configs/datasets/kaoshi/kaoshi_gen_86aca2.py | 76 ++++++++++ opencompass/datasets/__init__.py | 1 + opencompass/datasets/kaoshi.py | 138 +++++++++++++++++++ 4 files changed, 219 insertions(+) create mode 100644 configs/datasets/kaoshi/kaoshi_gen.py create mode 100644 configs/datasets/kaoshi/kaoshi_gen_86aca2.py create mode 100644 opencompass/datasets/kaoshi.py diff --git a/configs/datasets/kaoshi/kaoshi_gen.py b/configs/datasets/kaoshi/kaoshi_gen.py new file mode 100644 index 00000000..b848f09d --- /dev/null +++ b/configs/datasets/kaoshi/kaoshi_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .kaoshi_gen_86aca2 import kaoshi_datasets # noqa: F401, F403 diff --git a/configs/datasets/kaoshi/kaoshi_gen_86aca2.py b/configs/datasets/kaoshi/kaoshi_gen_86aca2.py new file mode 100644 index 00000000..4b330264 --- /dev/null +++ b/configs/datasets/kaoshi/kaoshi_gen_86aca2.py @@ -0,0 +1,76 @@ +from opencompass.datasets import KaoshiDataset, KaoshiEvaluator +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever + +prompts = { + "单选题" : "请你做一道单项选择题\n请你一步一步思考并将思考过程写在【解析】和之间。你将从A,B,C,D中选出正确的答案,并写在【答案】和之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A \n完整的题目回答的格式如下:\n【解析】 ... \n【答案】 ... \n请你严格按照上述格式作答。\n题目如下:", + "多选题" : "请你做一道多项选择题\n请你一步一步思考并将思考过程写在【解析】和之间。你将从多个选项中选出正确的答案,答案可能是一个到多个选项,奇怪将其写在【答案】和之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A D \n完整的题目回答的格式如下:\n【解析】 ... \n【答案】 ... \n请你严格按照上述格式作答。\n题目如下:", + "填空题" : "请解答下面的填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和之间。请把你的答案写在【答案】和之间,答案应只包含最终结果,不要添加额外词语。\n完整的题目回答格式如下:\n【解析】 ... \n【答案】... \n请你严格按照上述格式作答。\n题目如下:", + "完形填空" : "请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从A,B,C,D中选出正确的答案,并写在【答案】和之间。\n例如:(1)【答案】 A \n(2)【答案】 B \n请你严格按照上述格式作答。\n", + "七选五": "请回答下面的问题,将符合题意的五个选项的字母写在【答案】和之间,例如:【答案】 A B C D E \n请严格按照上述格式作答。题目如下:\n", + "判断题" : "请回答下面的判断题,将你的判断结果写在【答案】和之间,若给定表述正确时回答:\n【答案】正确 \n 表述错误时回答:\n【答案】错误 \n请严格按照上述格式作答。题目如下:\n", +} + +splits_with_type = {'单选题': ['职业-消防', '职业-测绘', '考研-经济', '职业-安全工程', '考研-政治', '职业-建筑', '考研-英语', '职业-教师资格', '职业-证券', '职业-会计', '职业-公务员', '考研-数学', '职业-高项', '考研-临床医学', '职业-银行', '考研-管理类综合', '职业-基金'], + '多选题': ['职业-消防', '职业-测绘', '考研-政治', '职业-建筑', '职业-证券', '职业-会计', '考研-临床医学', '职业-银行'], + '完形填空': ['考研-英语'], + '七选五': ['考研-英语'], + '判断题': ['职业-证券'], + '填空题': ['考研-数学']} + +zh2en = {'单选题': 'single_choice', '多选题': 'multi_choice', '完形填空': 'multi_question_choice', '判断题': 'judgment', '填空题': 'cloze', '七选五': 'five_out_of_seven'} + +kaoshi_datasets = [] + +for _type in list(splits_with_type.keys()): + for _split in splits_with_type[_type]: + _folder = _split.replace('-' + _type, '') + _p = prompts[_type] + _reader_cfg = { + "input_columns": ['question'], + "output_column": 'answer', + } + _infer_cfg = { + "ice_template": { + "type": PromptTemplate, + "template": { + "round": [{ + "role": "HUMAN", + "prompt": _p + '{question}' + }] + }, + "ice_token": "" + }, + "retriever": { + "type": ZeroRetriever + }, + "inferencer": { + "type": GenInferencer, + "max_out_len": 1024, + } + } + _eval_cfg = { + "evaluator": { + "type": KaoshiEvaluator, + "question_type": zh2en[_type], + }, + "pred_role": "BOT", + } + _base_path = './data/Kaoshi' + _dataset = { + "type": KaoshiDataset, + "abbr": "Kaoshi" + _split + '-' + _type, + "path": _base_path + '/' + _folder + '/' + _type + ".jsonl", + "name": zh2en[_type], + "reader_cfg": _reader_cfg, + "infer_cfg": _infer_cfg, + "eval_cfg": _eval_cfg, + } + + kaoshi_datasets.append(_dataset) + +_temporary_variables = [k for k in globals() if k.startswith('_')] +for _t in _temporary_variables: + del globals()[_t] +del _temporary_variables, _t diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index 566e8bdc..dd273fbc 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -38,6 +38,7 @@ from .humaneval import * # noqa: F401, F403 from .humanevalx import * # noqa: F401, F403 from .iwslt2017 import * # noqa: F401, F403 from .jigsawmultilingual import * # noqa: F401, F403 +from .kaoshi import KaoshiDataset, KaoshiEvaluator # noqa: F401, F403 from .lambada import * # noqa: F401, F403 from .lcsts import * # noqa: F401, F403 from .leval import * # noqa: F401, F403 diff --git a/opencompass/datasets/kaoshi.py b/opencompass/datasets/kaoshi.py new file mode 100644 index 00000000..96a7d083 --- /dev/null +++ b/opencompass/datasets/kaoshi.py @@ -0,0 +1,138 @@ +import json +import re + +from datasets import Dataset + +from opencompass.openicl.icl_evaluator import BaseEvaluator + +from .base import BaseDataset + + +def get_number(options): + + result_string = '' + for i, option in enumerate(options, start=65): + result_string += f'{chr(i)}. {option}\n' + return result_string + + +class KaoshiDataset(BaseDataset): + + @staticmethod + def load(path: str, name: str): + data_list = [] + with open(path, encoding='utf-8') as f: + for line in f: + data = json.loads(line) + if name in ['single_choice', 'multi_choice']: + data['question'] = data['question'].strip( + ) + '\n' + get_number(data['options']) + data_list.append(data) + return Dataset.from_list(data_list) + + +valid_kaoshi_question_types = [ + 'single_choice', 'multi_choice', 'multi_question_choice', + 'five_out_of_seven', 'cloze', 'judgment' +] + + +class KaoshiEvaluator(BaseEvaluator): + + def __init__(self, question_type) -> None: + super().__init__() + assert question_type in valid_kaoshi_question_types + self.question_type = question_type + + def do_predictions_postprocess(self, model_output, answer_lenth=None): + if self.question_type == 'single_choice': + model_answer = [] + temp = re.findall(r'[A-D]', model_output[::-1]) + if len(temp) != 0: + model_answer.append(temp[0]) + + elif self.question_type == 'multi_question_choice': + model_answer = [] + temp = re.findall(r'【答案】\s*[::]*\s*[A-Z]', model_output) + + if len(temp) == answer_lenth: + for t in temp: + model_answer.append(re.findall(r'[A-Z]', t)[0]) + else: + temp = re.findall(r'[A-Z]', model_output) + if len(temp) > 0: + for k in range(min(len(temp), answer_lenth)): + model_answer.append(temp[k]) + + elif self.question_type == 'multi_choice': + model_answer = [] + answer = '' + content = re.sub(r'\s+', '', model_output) + answer_index = content.find('【答案】') + if answer_index > 0: + temp = content[answer_index:] + if len(re.findall(r'[A-D]', temp)) > 0: + for t in re.findall(r'[A-D]', temp): + answer += t + else: + temp = content[-10:] + if len(re.findall(r'[A-D]', temp)) > 0: + for t in re.findall(r'[A-D]', temp): + answer += t + if len(answer) != 0: + model_answer.append(answer) + + elif self.question_type == 'five_out_of_seven': + model_answer = [] + temp = re.findall(r'[A-G]', model_output) + if len(temp) > 0: + for k in range(min(5, len(temp))): + model_answer.append(temp[k]) + + elif self.question_type in ['cloze', 'judgment']: + model_answer = [] + temp = re.findall(r'【答案】(.*?) ', model_output) + if len(temp) > 0: + model_answer.append(temp[0]) + + return model_answer + + def ensure_same_length(self, pred, refr): + if len(pred) == len(refr): + return pred + return ['Z'] * len(refr) + + def score(self, predictions, references): + if self.question_type not in valid_kaoshi_question_types: + return {'score': 100} + elif self.question_type == 'multi_choice': + correct_score, total_score = 0, 0 + for pred, refr in zip(predictions, references): + pred = self.do_predictions_postprocess(pred) + pred = self.ensure_same_length(pred, refr) + for p, r in zip(pred, refr): + if p == r: + correct_score += 2 + else: + for i in p: + if i not in r: + break + else: + correct_score += 1 + total_score += 2 + return {'score': correct_score / total_score * 100} + else: + correct_score, total_score = 0, 0 + for pred, refr in zip(predictions, references): + if self.question_type == 'multi_question_choice': + pred = self.do_predictions_postprocess(pred, len(refr)) + else: + pred = self.do_predictions_postprocess(pred) + if self.question_type in ['cloze', 'judgment']: + refr = [refr] + pred = self.ensure_same_length(pred, refr) + for p, r in zip(pred, refr): + if p == r: + correct_score += 1 + total_score += 1 + return {'score': correct_score / total_score * 100}