from mmengine.config import read_base with read_base(): from .groups.agieval import agieval_summary_groups from .groups.mmlu import mmlu_summary_groups from .groups.ceval import ceval_summary_groups from .groups.bbh import bbh_summary_groups from .groups.GaokaoBench import GaokaoBench_summary_groups from .groups.flores import flores_summary_groups summarizer = dict( dataset_abbrs = [ '--- Exam ---', 'agieval', 'mmlu-all-set', "ceval", "GaokaoBench", "bbh", '--- Coding ---', 'openai_humaneval', 'mbpp', '--- ChineseUniversal ---', 'C3', 'CMRC_dev', 'DRCD_dev', 'afqmc-dev', 'cmnli', 'ocnli', 'bustm-dev', 'chid-dev', 'cluewsc-dev', 'csl_dev', 'eprstmt-dev', 'ocnli_fc-dev', 'tnews-dev', 'lcsts', '--- Completion ---', 'lambada', 'story_cloze', '--- EnglishUniversal ---', 'AX_b', 'AX_g', 'BoolQ', 'CB', 'COPA', 'MultiRC', 'RTE', 'ReCoRD', 'WiC', 'WSC', 'race-high', 'race-middle', '--- NLG ---', 'Xsum', '--- Reasoning ---', 'gsm8k', 'summedits', 'math', 'TheoremQA', '--- QA ---', 'hellaswag', 'ARC-e', 'ARC-c', 'commonsense_qa', 'piqa', 'siqa', 'strategyqa', 'winogrande', 'openbookqa', 'openbookqa_fact', 'nq', 'triviaqa', '--- Translation ---', 'flores_100_Indo-European-Germanic_English', 'flores_100_English_Indo-European-Germanic', 'flores_100_Indo-European-Romance_English', 'flores_100_English_Indo-European-Romance', 'flores_100_zho_simpl-eng', 'flores_100_eng-zho_simpl', '--- Security ---', 'crows_pairs', ], summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []), prompt_db=dict( database_path='configs/datasets/log.json', config_dir='configs/datasets', blacklist='.promptignore'), )