2023-07-04 21:34:55 +08:00
|
|
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
|
|
from opencompass.openicl.icl_retriever import FixKRetriever
|
|
|
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
2024-05-21 14:22:46 +08:00
|
|
|
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
|
2023-07-04 21:34:55 +08:00
|
|
|
from opencompass.datasets import MMLUDataset
|
|
|
|
|
|
|
|
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
|
|
|
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
|
|
|
|
|
|
|
mmlu_reader_cfg = dict(
|
2024-05-14 15:35:58 +08:00
|
|
|
input_columns=['input', 'A', 'B', 'C', 'D'],
|
|
|
|
output_column='target',
|
2023-07-04 21:34:55 +08:00
|
|
|
train_split='dev')
|
|
|
|
|
|
|
|
mmlu_all_sets = [
|
2024-05-14 15:35:58 +08:00
|
|
|
'college_biology',
|
|
|
|
'college_chemistry',
|
|
|
|
'college_computer_science',
|
|
|
|
'college_mathematics',
|
|
|
|
'college_physics',
|
|
|
|
'electrical_engineering',
|
|
|
|
'astronomy',
|
|
|
|
'anatomy',
|
|
|
|
'abstract_algebra',
|
|
|
|
'machine_learning',
|
|
|
|
'clinical_knowledge',
|
|
|
|
'global_facts',
|
|
|
|
'management',
|
|
|
|
'nutrition',
|
|
|
|
'marketing',
|
|
|
|
'professional_accounting',
|
|
|
|
'high_school_geography',
|
|
|
|
'international_law',
|
|
|
|
'moral_scenarios',
|
|
|
|
'computer_security',
|
|
|
|
'high_school_microeconomics',
|
|
|
|
'professional_law',
|
|
|
|
'medical_genetics',
|
|
|
|
'professional_psychology',
|
|
|
|
'jurisprudence',
|
|
|
|
'world_religions',
|
|
|
|
'philosophy',
|
|
|
|
'virology',
|
|
|
|
'high_school_chemistry',
|
|
|
|
'public_relations',
|
|
|
|
'high_school_macroeconomics',
|
|
|
|
'human_sexuality',
|
|
|
|
'elementary_mathematics',
|
|
|
|
'high_school_physics',
|
|
|
|
'high_school_computer_science',
|
|
|
|
'high_school_european_history',
|
|
|
|
'business_ethics',
|
|
|
|
'moral_disputes',
|
|
|
|
'high_school_statistics',
|
|
|
|
'miscellaneous',
|
|
|
|
'formal_logic',
|
|
|
|
'high_school_government_and_politics',
|
|
|
|
'prehistory',
|
|
|
|
'security_studies',
|
|
|
|
'high_school_biology',
|
|
|
|
'logical_fallacies',
|
|
|
|
'high_school_world_history',
|
|
|
|
'professional_medicine',
|
|
|
|
'high_school_mathematics',
|
|
|
|
'college_medicine',
|
|
|
|
'high_school_us_history',
|
|
|
|
'sociology',
|
|
|
|
'econometrics',
|
|
|
|
'high_school_psychology',
|
|
|
|
'human_aging',
|
|
|
|
'us_foreign_policy',
|
|
|
|
'conceptual_physics',
|
2023-07-04 21:34:55 +08:00
|
|
|
]
|
|
|
|
|
|
|
|
mmlu_datasets = []
|
|
|
|
for _name in mmlu_all_sets:
|
|
|
|
_hint = f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.\n\n'
|
2024-03-04 14:42:36 +08:00
|
|
|
question_overall = '{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}'
|
2023-07-04 21:34:55 +08:00
|
|
|
mmlu_infer_cfg = dict(
|
|
|
|
ice_template=dict(
|
|
|
|
type=PromptTemplate,
|
2024-05-14 15:35:58 +08:00
|
|
|
template={opt: f'{question_overall}\nAnswer: {opt}\n' for opt in ['A', 'B', 'C', 'D']},
|
2023-07-04 21:34:55 +08:00
|
|
|
),
|
|
|
|
prompt_template=dict(
|
|
|
|
type=PromptTemplate,
|
2024-05-14 15:35:58 +08:00
|
|
|
template={opt: f'{_hint}</E>{question_overall}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']},
|
|
|
|
ice_token='</E>',
|
2023-07-04 21:34:55 +08:00
|
|
|
),
|
2023-10-07 12:53:41 +08:00
|
|
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
|
|
|
|
inferencer=dict(type=PPLInferencer),
|
2023-07-04 21:34:55 +08:00
|
|
|
)
|
|
|
|
|
2024-05-21 14:22:46 +08:00
|
|
|
mmlu_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator), )
|
2023-07-04 21:34:55 +08:00
|
|
|
|
|
|
|
mmlu_datasets.append(
|
|
|
|
dict(
|
2024-05-14 15:35:58 +08:00
|
|
|
abbr=f'lukaemon_mmlu_{_name}',
|
2023-07-04 21:34:55 +08:00
|
|
|
type=MMLUDataset,
|
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
|
|
|
path='opencompass/mmlu',
|
2023-07-04 21:34:55 +08:00
|
|
|
name=_name,
|
|
|
|
reader_cfg=mmlu_reader_cfg,
|
|
|
|
infer_cfg=mmlu_infer_cfg,
|
|
|
|
eval_cfg=mmlu_eval_cfg,
|
|
|
|
))
|
|
|
|
|
|
|
|
del _name, _hint
|