OpenCompass/configs/datasets/tydiqa/tydiqa_gen_978d2a.py

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TydiQADataset, TydiQAEvaluator
from os import environ

# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
    input_columns=['passage_text', 'question_text'],
    output_column='answer'
)

langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']

prefixs_prompt = {
    'english': ('Answer the following question based on the information in the given passage.', 'Passage:', 'Question:', 'Answer:'),
    'arabic': ('أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.', 'المقطع:', 'السؤال:', 'الإجابة:'),
    'bengali': ('প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।', 'অধ্যায়:', 'প্রশ্ন:', 'উত্তর:'),
    'finnish': ('Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.', 'Kappale:', 'Kysymys:', 'Vastaus:'),
    'indonesian': ('Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.', 'Bagian:', 'Pertanyaan:', 'Jawaban:'),
    'korean': ('주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.', '문단:', '질문:', '답변:'),
    'japanese':('文脈に基づいて質問に答えてください。','ぶんしょう:','しつもん:', 'かいとう:'),
    'russian': ('Ответьте на следующий вопрос на основе информации в данном отрывке.', 'Отрывок:', 'Вопрос:', 'Ответ:'),
    'swahili': ('Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.', 'Kifungu:', 'Swali:', 'Jibu:'),
    'telugu': ('ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.', 'పేరా:', 'ప్రశ్న:', 'సమాధానం:'),
    'thai':('ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:', 'ตอนข้อความ:', 'คำถาม:', 'คำตอบ:')
}

tydiqa_datasets = []
for _lang in langs:
    _hint = prefixs_prompt[_lang]
    tydiqa_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=f'{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}' ,
            ice_token='</E>'
        ),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer), max_out_len=50
    )

    tydiqa_eval_cfg = dict(
        evaluator=dict(type=TydiQAEvaluator),
        ds_split='validation',
        ds_column='answer',
    )

    # Skip japanese due to filter rules of Modelscope
    if environ.get('DATASET_SOURCE') == 'Modelscope' and _lang == 'japanese':
        continue

    tydiqa_datasets.append(
        dict(abbr=f'tydiqa-goldp_{_lang}',
            type=TydiQADataset,
            path='opencompass/tydiqa',
            lang=_lang,
            reader_cfg=tydiqa_reader_cfg,
            infer_cfg=tydiqa_infer_cfg,
            eval_cfg=tydiqa_eval_cfg
        )
    )
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`from opencompass.openicl.icl_prompt_template import PromptTemplate`
			`from opencompass.openicl.icl_retriever import ZeroRetriever`
			`from opencompass.openicl.icl_inferencer import GenInferencer`
			`from opencompass.datasets import TydiQADataset, TydiQAEvaluator`
[Feature] Support ModelScope datasets (#1289) * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * udpate dataset for modelscope support * update readme * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * update readme * remove tydiqa japanese subset * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * update readme * udpate dataset for modelscope support * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * remove tydiqa japanese subset * update util * remove .DS_Store * fix md format * move util into package * update docs/get_started.md * restore eval_api_zhipu_v2.py, add environment setting * Update dataset * Update * Update * Update * Update --------- Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local> Co-authored-by: Yunnglin <mao.looper@qq.com> Co-authored-by: Yun lin <yunlin@laptop.local> Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn> Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn> 2024-07-29 13:48:32 +08:00			`from os import environ`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00
			`# All configs are for TydiQA Goldp task`
			`tydiqa_reader_cfg = dict(`
[Format] Add config lints (#892) 2024-05-14 15:35:58 +08:00			`input_columns=['passage_text', 'question_text'],`
			`output_column='answer'`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`)`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00
			`langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']`

			`prefixs_prompt = {`
[Format] Add config lints (#892) 2024-05-14 15:35:58 +08:00			`'english': ('Answer the following question based on the information in the given passage.', 'Passage:', 'Question:', 'Answer:'),`
			`'arabic': ('أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.', 'المقطع:', 'السؤال:', 'الإجابة:'),`
			`'bengali': ('প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।', 'অধ্যায়:', 'প্রশ্ন:', 'উত্তর:'),`
			`'finnish': ('Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.', 'Kappale:', 'Kysymys:', 'Vastaus:'),`
			`'indonesian': ('Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.', 'Bagian:', 'Pertanyaan:', 'Jawaban:'),`
			`'korean': ('주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.', '문단:', '질문:', '답변:'),`
			`'japanese':('文脈に基づいて質問に答えてください。','ぶんしょう:','しつもん:', 'かいとう:'),`
			`'russian': ('Ответьте на следующий вопрос на основе информации в данном отрывке.', 'Отрывок:', 'Вопрос:', 'Ответ:'),`
			`'swahili': ('Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.', 'Kifungu:', 'Swali:', 'Jibu:'),`
			`'telugu': ('ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.', 'పేరా:', 'ప్రశ్న:', 'సమాధానం:'),`
			`'thai':('ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:', 'ตอนข้อความ:', 'คำถาม:', 'คำตอบ:')`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`}`

			`tydiqa_datasets = []`
			`for _lang in langs:`
			`_hint = prefixs_prompt[_lang]`
			`tydiqa_infer_cfg = dict(`
			`prompt_template=dict(`
			`type=PromptTemplate,`
[Format] Add config lints (#892) 2024-05-14 15:35:58 +08:00			`template=f'{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}' ,`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`ice_token='</E>'`
			`),`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`retriever=dict(type=ZeroRetriever),`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`inferencer=dict(type=GenInferencer), max_out_len=50`
			`)`

			`tydiqa_eval_cfg = dict(`
			`evaluator=dict(type=TydiQAEvaluator),`
			`ds_split='validation',`
			`ds_column='answer',`
			`)`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00
[Feature] Support ModelScope datasets (#1289) * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * udpate dataset for modelscope support * update readme * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * update readme * remove tydiqa japanese subset * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * update readme * udpate dataset for modelscope support * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * remove tydiqa japanese subset * update util * remove .DS_Store * fix md format * move util into package * update docs/get_started.md * restore eval_api_zhipu_v2.py, add environment setting * Update dataset * Update * Update * Update * Update --------- Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local> Co-authored-by: Yunnglin <mao.looper@qq.com> Co-authored-by: Yun lin <yunlin@laptop.local> Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn> Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn> 2024-07-29 13:48:32 +08:00			`# Skip japanese due to filter rules of Modelscope`
			`if environ.get('DATASET_SOURCE') == 'Modelscope' and _lang == 'japanese':`
			`continue`

[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`tydiqa_datasets.append(`
update word spell (#594) 2023-11-15 15:23:58 +08:00			`dict(abbr=f'tydiqa-goldp_{_lang}',`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`type=TydiQADataset,`
[Feature] Support ModelScope datasets (#1289) * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * udpate dataset for modelscope support * update readme * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * update readme * remove tydiqa japanese subset * add ceval, gsm8k modelscope surpport * update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest * update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets * format file * format file * update dataset format * support ms_dataset * udpate dataset for modelscope support * merge myl_dev and update test_ms_dataset * update readme * udpate dataset for modelscope support * update eval_api_zhipu_v2 * remove unused code * add get_data_path function * remove tydiqa japanese subset * update util * remove .DS_Store * fix md format * move util into package * update docs/get_started.md * restore eval_api_zhipu_v2.py, add environment setting * Update dataset * Update * Update * Update * Update --------- Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local> Co-authored-by: Yunnglin <mao.looper@qq.com> Co-authored-by: Yun lin <yunlin@laptop.local> Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn> Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn> 2024-07-29 13:48:32 +08:00			`path='opencompass/tydiqa',`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`lang=_lang,`
			`reader_cfg=tydiqa_reader_cfg,`
			`infer_cfg=tydiqa_infer_cfg,`
			`eval_cfg=tydiqa_eval_cfg`
			`)`
			`)`