from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.datasets import TydiQADataset, TydiQAEvaluator # All configs are for TydiQA Goldp task tydiqa_reader_cfg = dict( input_columns=["passage_text", "question_text"], output_column="answer" ) langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai'] prefixs_prompt = { "english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"), "arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"), "bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"), "finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"), "indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"), "korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"), "japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"), "russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"), "swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"), "telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"), "thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:") } tydiqa_datasets = [] for _lang in langs: _hint = prefixs_prompt[_lang] tydiqa_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template=f"{_hint[0]}\n\n{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" , ice_token='' ), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer), max_out_len=50 ) tydiqa_eval_cfg = dict( evaluator=dict(type=TydiQAEvaluator), ds_split='validation', ds_column='answer', ) tydiqa_datasets.append( dict(abbr=f'tydiqa-goldp_{_lang}', type=TydiQADataset, path='./data/tydiqa', lang=_lang, reader_cfg=tydiqa_reader_cfg, infer_cfg=tydiqa_infer_cfg, eval_cfg=tydiqa_eval_cfg ) )