2023-07-18 14:54:35 +08:00
|
|
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
|
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
|
|
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
|
|
from opencompass.datasets import TydiQADataset, TydiQAEvaluator
|
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
|
|
|
from os import environ
|
2023-07-18 14:54:35 +08:00
|
|
|
|
|
|
|
# All configs are for TydiQA Goldp task
|
|
|
|
tydiqa_reader_cfg = dict(
|
2024-05-14 15:35:58 +08:00
|
|
|
input_columns=['passage_text', 'question_text'],
|
|
|
|
output_column='answer'
|
2023-11-13 13:00:37 +08:00
|
|
|
)
|
2023-07-18 14:54:35 +08:00
|
|
|
|
|
|
|
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
|
|
|
|
|
|
|
|
prefixs_prompt = {
|
2024-05-14 15:35:58 +08:00
|
|
|
'english': ('Answer the following question based on the information in the given passage.', 'Passage:', 'Question:', 'Answer:'),
|
|
|
|
'arabic': ('أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.', 'المقطع:', 'السؤال:', 'الإجابة:'),
|
|
|
|
'bengali': ('প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।', 'অধ্যায়:', 'প্রশ্ন:', 'উত্তর:'),
|
|
|
|
'finnish': ('Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.', 'Kappale:', 'Kysymys:', 'Vastaus:'),
|
|
|
|
'indonesian': ('Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.', 'Bagian:', 'Pertanyaan:', 'Jawaban:'),
|
|
|
|
'korean': ('주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.', '문단:', '질문:', '답변:'),
|
|
|
|
'japanese':('文脈に基づいて質問に答えてください。','ぶんしょう:','しつもん:', 'かいとう:'),
|
|
|
|
'russian': ('Ответьте на следующий вопрос на основе информации в данном отрывке.', 'Отрывок:', 'Вопрос:', 'Ответ:'),
|
|
|
|
'swahili': ('Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.', 'Kifungu:', 'Swali:', 'Jibu:'),
|
|
|
|
'telugu': ('ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.', 'పేరా:', 'ప్రశ్న:', 'సమాధానం:'),
|
|
|
|
'thai':('ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:', 'ตอนข้อความ:', 'คำถาม:', 'คำตอบ:')
|
2023-07-18 14:54:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
tydiqa_datasets = []
|
|
|
|
for _lang in langs:
|
|
|
|
_hint = prefixs_prompt[_lang]
|
|
|
|
tydiqa_infer_cfg = dict(
|
|
|
|
prompt_template=dict(
|
|
|
|
type=PromptTemplate,
|
2024-05-14 15:35:58 +08:00
|
|
|
template=f'{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}' ,
|
2023-11-13 13:00:37 +08:00
|
|
|
ice_token='</E>'
|
|
|
|
),
|
2023-07-18 14:54:35 +08:00
|
|
|
retriever=dict(type=ZeroRetriever),
|
2023-11-13 13:00:37 +08:00
|
|
|
inferencer=dict(type=GenInferencer), max_out_len=50
|
|
|
|
)
|
|
|
|
|
|
|
|
tydiqa_eval_cfg = dict(
|
|
|
|
evaluator=dict(type=TydiQAEvaluator),
|
|
|
|
ds_split='validation',
|
|
|
|
ds_column='answer',
|
|
|
|
)
|
2023-07-18 14:54:35 +08:00
|
|
|
|
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
|
|
|
# Skip japanese due to filter rules of Modelscope
|
|
|
|
if environ.get('DATASET_SOURCE') == 'Modelscope' and _lang == 'japanese':
|
|
|
|
continue
|
|
|
|
|
2023-07-18 14:54:35 +08:00
|
|
|
tydiqa_datasets.append(
|
2023-11-15 15:23:58 +08:00
|
|
|
dict(abbr=f'tydiqa-goldp_{_lang}',
|
2023-11-13 13:00:37 +08:00
|
|
|
type=TydiQADataset,
|
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
|
|
|
path='opencompass/tydiqa',
|
2023-11-13 13:00:37 +08:00
|
|
|
lang=_lang,
|
|
|
|
reader_cfg=tydiqa_reader_cfg,
|
|
|
|
infer_cfg=tydiqa_infer_cfg,
|
|
|
|
eval_cfg=tydiqa_eval_cfg
|
|
|
|
)
|
|
|
|
)
|