mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
57 lines
3.3 KiB
Python
57 lines
3.3 KiB
Python
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
from opencompass.datasets import TydiQADataset, TydiQAEvaluator
|
|
|
|
# All configs are for TydiQA Goldp task
|
|
tydiqa_reader_cfg = dict(
|
|
input_columns=["passage_text", "question_text"],
|
|
output_column="answer"
|
|
)
|
|
|
|
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
|
|
|
|
prefixs_prompt = {
|
|
"english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"),
|
|
"arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"),
|
|
"bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"),
|
|
"finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"),
|
|
"indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"),
|
|
"korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"),
|
|
"japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"),
|
|
"russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"),
|
|
"swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"),
|
|
"telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"),
|
|
"thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:")
|
|
}
|
|
|
|
tydiqa_datasets = []
|
|
for _lang in langs:
|
|
_hint = prefixs_prompt[_lang]
|
|
tydiqa_infer_cfg = dict(
|
|
prompt_template=dict(
|
|
type=PromptTemplate,
|
|
template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
|
|
ice_token='</E>'
|
|
),
|
|
retriever=dict(type=ZeroRetriever),
|
|
inferencer=dict(type=GenInferencer), max_out_len=50
|
|
)
|
|
|
|
tydiqa_eval_cfg = dict(
|
|
evaluator=dict(type=TydiQAEvaluator),
|
|
ds_split='validation',
|
|
ds_column='answer',
|
|
)
|
|
|
|
tydiqa_datasets.append(
|
|
dict(abbr=f'tydiqa-goldp_{_lang}',
|
|
type=TydiQADataset,
|
|
path='./data/tydiqa',
|
|
lang=_lang,
|
|
reader_cfg=tydiqa_reader_cfg,
|
|
infer_cfg=tydiqa_infer_cfg,
|
|
eval_cfg=tydiqa_eval_cfg
|
|
)
|
|
)
|