[Feature] Add kaoshi dataset (#392)

* Add ToT method * Update ToT * Update ToT * Update ToT * Update ToT * Update ToT * Add Koashi * Update Kaoshi * Update Kaoshi * Update kaoshi * Update kaoshi * Update Kaoshi * Update Kaoshi * Update Kaoshi * Update Kaoshi * update Kaoshi * update * update * fix --------- Co-authored-by: gaotongxiao <gaotongxiao@gmail.com>
2025-05-30 16:03:24 +08:00 · 2023-09-22 18:46:33 +08:00 · 2023-09-22 18:46:33 +08:00 · c5224c2a91
commit c5224c2a91
parent 2a62bea1a4
4 changed files with 219 additions and 0 deletions
--- a/configs/datasets/kaoshi/kaoshi_gen.py
+++ b/configs/datasets/kaoshi/kaoshi_gen.py
@ -0,0 +1,4 @@
 from mmengine.config import read_base
 with read_base():
    from .kaoshi_gen_86aca2 import kaoshi_datasets  # noqa: F401, F403
--- a/configs/datasets/kaoshi/kaoshi_gen_86aca2.py
+++ b/configs/datasets/kaoshi/kaoshi_gen_86aca2.py
@ -0,0 +1,76 @@
 from opencompass.datasets import KaoshiDataset, KaoshiEvaluator
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 prompts = {
        "单选题" : "请你做一道单项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
        "多选题" : "请你做一道多项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从多个选项中选出正确的答案，答案可能是一个到多个选项，奇怪将其写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n例如：【答案】: A D <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
        "填空题" : "请解答下面的填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n完整的题目回答格式如下：\n【解析】 ... <eoe>\n【答案】... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
        "完形填空" : "请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n",
        "七选五": "请回答下面的问题，将符合题意的五个选项的字母写在【答案】和<eoa>之间，例如：【答案】 A B C D E <eoa>\n请严格按照上述格式作答。题目如下：\n",
        "判断题" : "请回答下面的判断题，将你的判断结果写在【答案】和<eoa>之间，若给定表述正确时回答：\n【答案】正确 <eoa>\n 表述错误时回答：\n【答案】错误 <eoa>\n请严格按照上述格式作答。题目如下：\n",
 }
 splits_with_type = {'单选题': ['职业-消防', '职业-测绘', '考研-经济', '职业-安全工程', '考研-政治', '职业-建筑', '考研-英语', '职业-教师资格', '职业-证券', '职业-会计', '职业-公务员', '考研-数学', '职业-高项', '考研-临床医学', '职业-银行', '考研-管理类综合', '职业-基金'], 
                    '多选题': ['职业-消防', '职业-测绘', '考研-政治', '职业-建筑', '职业-证券', '职业-会计', '考研-临床医学', '职业-银行'], 
                    '完形填空': ['考研-英语'], 
                    '七选五': ['考研-英语'], 
                    '判断题': ['职业-证券'], 
                    '填空题': ['考研-数学']}
 zh2en = {'单选题': 'single_choice', '多选题': 'multi_choice', '完形填空': 'multi_question_choice', '判断题': 'judgment', '填空题': 'cloze', '七选五': 'five_out_of_seven'}
 kaoshi_datasets = []
 for _type in list(splits_with_type.keys()):
    for _split in splits_with_type[_type]:
        _folder = _split.replace('-' + _type, '')
        _p = prompts[_type]
        _reader_cfg = {
            "input_columns": ['question'],
            "output_column": 'answer',
        }
        _infer_cfg = {
            "ice_template": {
                "type": PromptTemplate,
                "template": {
                    "round": [{
                        "role": "HUMAN",
                        "prompt": _p + '{question}'
                    }]
                },
                "ice_token": "</E>"
            },
            "retriever": {
                "type": ZeroRetriever
            },
            "inferencer": {
                "type": GenInferencer,
                "max_out_len": 1024,
            }
        }
        _eval_cfg = {
            "evaluator": {
                "type": KaoshiEvaluator,
                  "question_type": zh2en[_type],
            },
            "pred_role": "BOT",
        }
        _base_path = './data/Kaoshi'
        _dataset = {
            "type": KaoshiDataset,
            "abbr": "Kaoshi" + _split + '-' + _type,
            "path": _base_path + '/' + _folder + '/' + _type + ".jsonl",
            "name": zh2en[_type],
            "reader_cfg": _reader_cfg,
            "infer_cfg": _infer_cfg,
            "eval_cfg": _eval_cfg,
        }
        kaoshi_datasets.append(_dataset)
 _temporary_variables = [k for k in globals() if k.startswith('_')]
 for _t in _temporary_variables:
    del globals()[_t]
 del _temporary_variables, _t
--- a/opencompass/datasets/init.py
+++ b/opencompass/datasets/init.py
@ -38,6 +38,7 @@ from .humaneval import *  # noqa: F401, F403
 from .humanevalx import *  # noqa: F401, F403
 from .iwslt2017 import *  # noqa: F401, F403
 from .jigsawmultilingual import *  # noqa: F401, F403
 from .kaoshi import KaoshiDataset, KaoshiEvaluator  # noqa: F401, F403
 from .lambada import *  # noqa: F401, F403
 from .lcsts import *  # noqa: F401, F403
 from .leval import *  # noqa: F401, F403
--- a/opencompass/datasets/kaoshi.py
+++ b/opencompass/datasets/kaoshi.py
@ -0,0 +1,138 @@
 import json
 import re
 from datasets import Dataset
 from opencompass.openicl.icl_evaluator import BaseEvaluator
 from .base import BaseDataset
 def get_number(options):
    result_string = ''
    for i, option in enumerate(options, start=65):
        result_string += f'{chr(i)}. {option}\n'
    return result_string
 class KaoshiDataset(BaseDataset):
    @staticmethod
    def load(path: str, name: str):
        data_list = []
        with open(path, encoding='utf-8') as f:
            for line in f:
                data = json.loads(line)
                if name in ['single_choice', 'multi_choice']:
                    data['question'] = data['question'].strip(
                    ) + '\n' + get_number(data['options'])
                data_list.append(data)
        return Dataset.from_list(data_list)
 valid_kaoshi_question_types = [
    'single_choice', 'multi_choice', 'multi_question_choice',
    'five_out_of_seven', 'cloze', 'judgment'
 ]
 class KaoshiEvaluator(BaseEvaluator):
    def __init__(self, question_type) -> None:
        super().__init__()
        assert question_type in valid_kaoshi_question_types
        self.question_type = question_type
    def do_predictions_postprocess(self, model_output, answer_lenth=None):
        if self.question_type == 'single_choice':
            model_answer = []
            temp = re.findall(r'[A-D]', model_output[::-1])
            if len(temp) != 0:
                model_answer.append(temp[0])
        elif self.question_type == 'multi_question_choice':
            model_answer = []
            temp = re.findall(r'【答案】\s*[:：]*\s*[A-Z]', model_output)
            if len(temp) == answer_lenth:
                for t in temp:
                    model_answer.append(re.findall(r'[A-Z]', t)[0])
            else:
                temp = re.findall(r'[A-Z]', model_output)
                if len(temp) > 0:
                    for k in range(min(len(temp), answer_lenth)):
                        model_answer.append(temp[k])
        elif self.question_type == 'multi_choice':
            model_answer = []
            answer = ''
            content = re.sub(r'\s+', '', model_output)
            answer_index = content.find('【答案】')
            if answer_index > 0:
                temp = content[answer_index:]
                if len(re.findall(r'[A-D]', temp)) > 0:
                    for t in re.findall(r'[A-D]', temp):
                        answer += t
            else:
                temp = content[-10:]
                if len(re.findall(r'[A-D]', temp)) > 0:
                    for t in re.findall(r'[A-D]', temp):
                        answer += t
            if len(answer) != 0:
                model_answer.append(answer)
        elif self.question_type == 'five_out_of_seven':
            model_answer = []
            temp = re.findall(r'[A-G]', model_output)
            if len(temp) > 0:
                for k in range(min(5, len(temp))):
                    model_answer.append(temp[k])
        elif self.question_type in ['cloze', 'judgment']:
            model_answer = []
            temp = re.findall(r'【答案】(.*?) ', model_output)
            if len(temp) > 0:
                model_answer.append(temp[0])
        return model_answer
    def ensure_same_length(self, pred, refr):
        if len(pred) == len(refr):
            return pred
        return ['Z'] * len(refr)
    def score(self, predictions, references):
        if self.question_type not in valid_kaoshi_question_types:
            return {'score': 100}
        elif self.question_type == 'multi_choice':
            correct_score, total_score = 0, 0
            for pred, refr in zip(predictions, references):
                pred = self.do_predictions_postprocess(pred)
                pred = self.ensure_same_length(pred, refr)
                for p, r in zip(pred, refr):
                    if p == r:
                        correct_score += 2
                    else:
                        for i in p:
                            if i not in r:
                                break
                        else:
                            correct_score += 1
                    total_score += 2
            return {'score': correct_score / total_score * 100}
        else:
            correct_score, total_score = 0, 0
            for pred, refr in zip(predictions, references):
                if self.question_type == 'multi_question_choice':
                    pred = self.do_predictions_postprocess(pred, len(refr))
                else:
                    pred = self.do_predictions_postprocess(pred)
                if self.question_type in ['cloze', 'judgment']:
                    refr = [refr]
                pred = self.ensure_same_length(pred, refr)
                for p, r in zip(pred, refr):
                    if p == r:
                        correct_score += 1
                    total_score += 1
            return {'score': correct_score / total_score * 100}