OpenCompass/opencompass/datasets/mgsm.py

import re

from datasets import Dataset

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import LOAD_DATASET
from opencompass.utils import get_data_path

from .base import BaseDataset


@LOAD_DATASET.register_module()
class MGSMSDataset(BaseDataset):

    @staticmethod
    def load(path: str):
        path = get_data_path(path, local_mode=True)
        src_lines = open(path, 'r', encoding='utf-8').readlines()
        data = {'question': [], 'answer': []}
        for lines in src_lines:
            question, answer = lines.strip().split('\t')
            data['question'].append(question)
            data['answer'].append(answer)

        dataset = Dataset.from_dict({
            'question': data['question'],
            'answer': data['answer']
        })
        return dataset


LANG_TO_ANSWER_PREFIX = {
    'en': 'Answer',
    'bn': 'উত্তর',
    'de': 'Antwort',
    'es': 'Respuesta',
    'fr': 'Réponse',
    'ja': '答え',
    'ru': 'Ответ',
    'sw': 'Jibu',
    'te': 'సమాధానం',
    'th': 'คำตอบ',
    'zh': '答案',
}


def mgsm_postprocess(text: str, lang: str) -> str:
    answer_prefix = LANG_TO_ANSWER_PREFIX[lang]
    if answer_prefix not in text:
        return ''
    answer_text = text.split(answer_prefix)[-1].strip()
    numbers = re.findall(r'\d+\.?\d*', answer_text.replace(',', ''))
    return numbers[-1].rstrip('.') if numbers else ''


class MGSM_Evaluator(BaseEvaluator):

    def score(self, predictions, references):
        assert len(predictions) == len(references)

        num_correct, total = 0, 0
        details = {}
        for index, (references_answer, predictions_answer) in enumerate(
                zip(references, predictions)):
            if references_answer == predictions_answer:
                is_correct = True
            else:
                is_correct = False

            num_correct += is_correct
            total += 1
            details[str(index)] = {
                'references': references_answer,
                'predictions': predictions_answer,
                'correct': is_correct,
            }

        accuracy = num_correct / total * 100
        final_result = {'accuracy': accuracy, 'details': details}
        return final_result
add mgsm datasets (#1081) * add mgsm datasets * fix lint * fix lint * update mgsm * update mgsm * ease code spell * update * update * update --------- Co-authored-by: Leymore <zfz-960727@163.com> 2024-05-06 15:29:34 +08:00			`import re`

			`from datasets import Dataset`

			`from opencompass.openicl.icl_evaluator import BaseEvaluator`
			`from opencompass.registry import LOAD_DATASET`
[Feature] Support ModelScope datasets (#1289) * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * udpate dataset for modelscope support * update readme * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * update readme * remove tydiqa japanese subset * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * update readme * udpate dataset for modelscope support * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * remove tydiqa japanese subset * update util * remove .DS_Store * fix md format * move util into package * update docs/get_started.md * restore eval_api_zhipu_v2.py, add environment setting * Update dataset * Update * Update * Update * Update --------- Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local> Co-authored-by: Yunnglin <mao.looper@qq.com> Co-authored-by: Yun lin <yunlin@laptop.local> Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn> Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn> 2024-07-29 13:48:32 +08:00			`from opencompass.utils import get_data_path`
add mgsm datasets (#1081) * add mgsm datasets * fix lint * fix lint * update mgsm * update mgsm * ease code spell * update * update * update --------- Co-authored-by: Leymore <zfz-960727@163.com> 2024-05-06 15:29:34 +08:00
			`from .base import BaseDataset`


			`@LOAD_DATASET.register_module()`
			`class MGSMSDataset(BaseDataset):`

			`@staticmethod`
			`def load(path: str):`
[Feature] Support ModelScope datasets (#1289) * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * udpate dataset for modelscope support * update readme * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * update readme * remove tydiqa japanese subset * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * update readme * udpate dataset for modelscope support * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * remove tydiqa japanese subset * update util * remove .DS_Store * fix md format * move util into package * update docs/get_started.md * restore eval_api_zhipu_v2.py, add environment setting * Update dataset * Update * Update * Update * Update --------- Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local> Co-authored-by: Yunnglin <mao.looper@qq.com> Co-authored-by: Yun lin <yunlin@laptop.local> Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn> Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn> 2024-07-29 13:48:32 +08:00			`path = get_data_path(path, local_mode=True)`
add mgsm datasets (#1081) * add mgsm datasets * fix lint * fix lint * update mgsm * update mgsm * ease code spell * update * update * update --------- Co-authored-by: Leymore <zfz-960727@163.com> 2024-05-06 15:29:34 +08:00			`src_lines = open(path, 'r', encoding='utf-8').readlines()`
			`data = {'question': [], 'answer': []}`
			`for lines in src_lines:`
			`question, answer = lines.strip().split('\t')`
			`data['question'].append(question)`
			`data['answer'].append(answer)`

			`dataset = Dataset.from_dict({`
			`'question': data['question'],`
			`'answer': data['answer']`
			`})`
			`return dataset`


			`LANG_TO_ANSWER_PREFIX = {`
			`'en': 'Answer',`
			`'bn': 'উত্তর',`
			`'de': 'Antwort',`
			`'es': 'Respuesta',`
			`'fr': 'Réponse',`
			`'ja': '答え',`
			`'ru': 'Ответ',`
			`'sw': 'Jibu',`
			`'te': 'సమాధానం',`
			`'th': 'คำตอบ',`
			`'zh': '答案',`
			`}`


			`def mgsm_postprocess(text: str, lang: str) -> str:`
			`answer_prefix = LANG_TO_ANSWER_PREFIX[lang]`
			`if answer_prefix not in text:`
			`return ''`
			`answer_text = text.split(answer_prefix)[-1].strip()`
			`numbers = re.findall(r'\d+\.?\d*', answer_text.replace(',', ''))`
			`return numbers[-1].rstrip('.') if numbers else ''`


			`class MGSM_Evaluator(BaseEvaluator):`

			`def score(self, predictions, references):`
			`assert len(predictions) == len(references)`

			`num_correct, total = 0, 0`
			`details = {}`
			`for index, (references_answer, predictions_answer) in enumerate(`
			`zip(references, predictions)):`
			`if references_answer == predictions_answer:`
			`is_correct = True`
			`else:`
			`is_correct = False`

			`num_correct += is_correct`
			`total += 1`
			`details[str(index)] = {`
			`'references': references_answer,`
			`'predictions': predictions_answer,`
			`'correct': is_correct,`
			`}`

			`accuracy = num_correct / total * 100`
			`final_result = {'accuracy': accuracy, 'details': details}`
			`return final_result`