OpenCompass/configs/datasets/tydiqa/tydiqa_gen_978d2a.py

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TydiQADataset, TydiQAEvaluator

# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
    input_columns=["passage_text", "question_text"],
    output_column="answer"
)

langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']

prefixs_prompt = {
    "english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"),
    "arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"),
    "bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"),
    "finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"),
    "indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"),
    "korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"),
    "japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"),
    "russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"),
    "swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"),
    "telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"),
    "thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:")
}

tydiqa_datasets = []
for _lang in langs:
    _hint = prefixs_prompt[_lang]
    tydiqa_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
            ice_token='</E>'
        ),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer), max_out_len=50
    )

    tydiqa_eval_cfg = dict(
        evaluator=dict(type=TydiQAEvaluator),
        ds_split='validation',
        ds_column='answer',
    )

    tydiqa_datasets.append(
        dict(abbr=f'tydiqa-goldp_{_lang}',
            type=TydiQADataset,
            path='./data/tydiqa',
            lang=_lang,
            reader_cfg=tydiqa_reader_cfg,
            infer_cfg=tydiqa_infer_cfg,
            eval_cfg=tydiqa_eval_cfg
        )
    )
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`from opencompass.openicl.icl_prompt_template import PromptTemplate`
			`from opencompass.openicl.icl_retriever import ZeroRetriever`
			`from opencompass.openicl.icl_inferencer import GenInferencer`
			`from opencompass.datasets import TydiQADataset, TydiQAEvaluator`

			`# All configs are for TydiQA Goldp task`
			`tydiqa_reader_cfg = dict(`
			`input_columns=["passage_text", "question_text"],`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`output_column="answer"`
			`)`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00
			`langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']`

			`prefixs_prompt = {`
			`"english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"),`
			`"arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"),`
			`"bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"),`
			`"finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"),`
			`"indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"),`
			`"korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"),`
			`"japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"),`
			`"russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"),`
			`"swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"),`
			`"telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"),`
			`"thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:")`
			`}`

			`tydiqa_datasets = []`
			`for _lang in langs:`
			`_hint = prefixs_prompt[_lang]`
			`tydiqa_infer_cfg = dict(`
			`prompt_template=dict(`
			`type=PromptTemplate,`
			`template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`ice_token='</E>'`
			`),`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00			`retriever=dict(type=ZeroRetriever),`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`inferencer=dict(type=GenInferencer), max_out_len=50`
			`)`

			`tydiqa_eval_cfg = dict(`
			`evaluator=dict(type=TydiQAEvaluator),`
			`ds_split='validation',`
			`ds_column='answer',`
			`)`
[Feature] Add tydiqa-goldp (#75) Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> 2023-07-18 14:54:35 +08:00
			`tydiqa_datasets.append(`
update word spell (#594) 2023-11-15 15:23:58 +08:00			`dict(abbr=f'tydiqa-goldp_{_lang}',`
[Feature] Use dataset in local path (#570) * update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name 2023-11-13 13:00:37 +08:00			`type=TydiQADataset,`
			`path='./data/tydiqa',`
			`lang=_lang,`
			`reader_cfg=tydiqa_reader_cfg,`
			`infer_cfg=tydiqa_infer_cfg,`
			`eval_cfg=tydiqa_eval_cfg`
			`)`
			`)`