From 9083dea6838b5843ec066f5323be59645a827514 Mon Sep 17 00:00:00 2001 From: Fengzhe Zhou Date: Mon, 27 Nov 2023 16:06:49 +0800 Subject: [PATCH] [Sync] some renaming (#641) --- configs/datasets/ceval/ceval_gen_2daf24.py | 185 +++++------------- configs/datasets/ceval/ceval_gen_5f30c7.py | 185 +++++------------- configs/datasets/ceval/ceval_ppl_578f8d.py | 185 +++++------------- configs/datasets/ceval/ceval_ppl_93e5ce.py | 185 +++++------------- .../ceval/ceval_zero_shot_gen_bd40ef.py | 105 ++++++++++ .../commonsenseqa/commonsenseqa_ppl_c49e77.py | 41 ++++ configs/datasets/ds1000/ds1000_gen_cbc84f.py | 4 +- .../ds1000/ds1000_service_eval_gen_cbc84f.py | 67 +++++++ .../hellaswag/hellaswag_ppl_7d7f2d.py | 33 ++++ .../mmlu/mmlu_zero_shot_gen_47e2c0.py | 123 ++++++++++++ configs/models/bluelm/hf_bluelm_7b_base.py | 24 +++ .../models/bluelm/hf_bluelm_7b_base_32k.py | 24 +++ configs/models/bluelm/hf_bluelm_7b_chat.py | 32 +++ .../models/bluelm/hf_bluelm_7b_chat_32k.py | 32 +++ .../models/nanbeige/hf_nanbeige_16b_base.py | 33 ++++ .../nanbeige/hf_nanbeige_16b_base_32k.py | 34 ++++ .../models/nanbeige/hf_nanbeige_16b_chat.py | 34 ++++ .../nanbeige/hf_nanbeige_16b_chat_32k.py | 34 ++++ .../models/others/hf_dolphin_21_mistral_7b.py | 33 ++++ .../models/others/hf_fashiongpt_70b_v11.py | 33 ++++ .../models/others/hf_orionstar_yi_34b_chat.py | 34 ++++ configs/summarizers/groups/ds1000.py | 5 + opencompass/datasets/ds1000.py | 104 ++++++++++ opencompass/datasets/humaneval.py | 1 + opencompass/datasets/mbpp.py | 15 +- opencompass/summarizers/default.py | 10 +- opencompass/utils/text_postprocessors.py | 54 ++++- tools/prompt_viewer.py | 17 +- 28 files changed, 1102 insertions(+), 564 deletions(-) create mode 100644 configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py create mode 100644 configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py create mode 100644 configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py create mode 100644 configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py create mode 100644 configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py create mode 100644 configs/models/bluelm/hf_bluelm_7b_base.py create mode 100644 configs/models/bluelm/hf_bluelm_7b_base_32k.py create mode 100644 configs/models/bluelm/hf_bluelm_7b_chat.py create mode 100644 configs/models/bluelm/hf_bluelm_7b_chat_32k.py create mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base.py create mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base_32k.py create mode 100644 configs/models/nanbeige/hf_nanbeige_16b_chat.py create mode 100644 configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py create mode 100644 configs/models/others/hf_dolphin_21_mistral_7b.py create mode 100644 configs/models/others/hf_fashiongpt_70b_v11.py create mode 100644 configs/models/others/hf_orionstar_yi_34b_chat.py create mode 100644 configs/summarizers/groups/ds1000.py diff --git a/configs/datasets/ceval/ceval_gen_2daf24.py b/configs/datasets/ceval/ceval_gen_2daf24.py index a2e020f1..f29a637f 100644 --- a/configs/datasets/ceval/ceval_gen_2daf24.py +++ b/configs/datasets/ceval/ceval_gen_2daf24.py @@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset from opencompass.utils.text_postprocessors import first_capital_postprocess ceval_subject_mapping = { - "computer_network": - ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"], - "operating_system": - ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"], - "computer_architecture": - ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"], - "college_programming": - ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"], - "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"], - "college_chemistry": - ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"], - "advanced_mathematics": - ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"], - "probability_and_statistics": - ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"], - "discrete_mathematics": - ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"], - "electrical_engineer": [ - "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08", - "STEM" - ], - "metrology_engineer": - ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"], - "high_school_mathematics": - ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"], - "high_school_physics": - ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"], - "high_school_chemistry": - ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"], - "high_school_biology": [ - "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_mathematics": [ - "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM" - ], - "middle_school_biology": [ - "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_physics": [ - "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM" - ], - "middle_school_chemistry": [ - "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM" - ], - "veterinary_medicine": [ - "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM" - ], - "college_economics": [ - "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science" - ], - "business_administration": [ - "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science" - ], - "marxism": [ - "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406", - "Social Science" - ], - "mao_zedong_thought": [ - "Mao Zedong Thought", - "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba", - "Social Science" - ], - "education_science": [ - "Education Science", "\u6559\u80b2\u5b66", "Social Science" - ], - "teacher_qualification": [ - "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science" - ], - "high_school_politics": [ - "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science" - ], - "high_school_geography": [ - "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science" - ], - "middle_school_politics": [ - "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science" - ], - "middle_school_geography": [ - "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science" - ], - "modern_chinese_history": - ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"], - "ideological_and_moral_cultivation": [ - "Ideological and Moral Cultivation", - "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840", - "Humanities" - ], - "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"], - "law": ["Law", "\u6cd5\u5b66", "Humanities"], - "chinese_language_and_literature": [ - "Chinese Language and Literature", - "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities" - ], - "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"], - "professional_tour_guide": [ - "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities" - ], - "legal_professional": [ - "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c", - "Humanities" - ], - "high_school_chinese": [ - "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities" - ], - "high_school_history": [ - "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities" - ], - "middle_school_history": [ - "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities" - ], - "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"], - "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"], - "plant_protection": [ - "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other" - ], - "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"], - "clinical_medicine": [ - "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other" - ], - "urban_and_rural_planner": [ - "Urban and Rural Planner", - "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other" - ], - "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"], - "fire_engineer": [ - "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other" - ], - "environmental_impact_assessment_engineer": [ - "Environmental Impact Assessment Engineer", - "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other" - ], - "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"], - "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"] + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], + 'operating_system': ['Operating System', '操作系统', 'STEM'], + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], + 'college_programming': ['College Programming', '大学编程', 'STEM'], + 'college_physics': ['College Physics', '大学物理', 'STEM'], + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], + 'education_science': ['Education Science', '教育学', 'Social Science'], + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], + 'logic': ['Logic', '逻辑学', 'Humanities'], + 'law': ['Law', '法学', 'Humanities'], + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], + 'high_school_history': ['High School History', '高中历史', 'Humanities'], + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], + 'civil_servant': ['Civil Servant', '公务员', 'Other'], + 'sports_science': ['Sports Science', '体育学', 'Other'], + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], + 'accountant': ['Accountant', '注册会计师', 'Other'], + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], + 'physician': ['Physician', '医师资格', 'Other'], } ceval_all_sets = list(ceval_subject_mapping.keys()) diff --git a/configs/datasets/ceval/ceval_gen_5f30c7.py b/configs/datasets/ceval/ceval_gen_5f30c7.py index caca0028..606e503f 100644 --- a/configs/datasets/ceval/ceval_gen_5f30c7.py +++ b/configs/datasets/ceval/ceval_gen_5f30c7.py @@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset from opencompass.utils.text_postprocessors import first_capital_postprocess ceval_subject_mapping = { - "computer_network": - ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"], - "operating_system": - ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"], - "computer_architecture": - ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"], - "college_programming": - ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"], - "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"], - "college_chemistry": - ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"], - "advanced_mathematics": - ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"], - "probability_and_statistics": - ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"], - "discrete_mathematics": - ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"], - "electrical_engineer": [ - "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08", - "STEM" - ], - "metrology_engineer": - ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"], - "high_school_mathematics": - ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"], - "high_school_physics": - ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"], - "high_school_chemistry": - ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"], - "high_school_biology": [ - "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_mathematics": [ - "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM" - ], - "middle_school_biology": [ - "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_physics": [ - "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM" - ], - "middle_school_chemistry": [ - "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM" - ], - "veterinary_medicine": [ - "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM" - ], - "college_economics": [ - "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science" - ], - "business_administration": [ - "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science" - ], - "marxism": [ - "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406", - "Social Science" - ], - "mao_zedong_thought": [ - "Mao Zedong Thought", - "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba", - "Social Science" - ], - "education_science": [ - "Education Science", "\u6559\u80b2\u5b66", "Social Science" - ], - "teacher_qualification": [ - "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science" - ], - "high_school_politics": [ - "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science" - ], - "high_school_geography": [ - "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science" - ], - "middle_school_politics": [ - "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science" - ], - "middle_school_geography": [ - "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science" - ], - "modern_chinese_history": - ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"], - "ideological_and_moral_cultivation": [ - "Ideological and Moral Cultivation", - "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840", - "Humanities" - ], - "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"], - "law": ["Law", "\u6cd5\u5b66", "Humanities"], - "chinese_language_and_literature": [ - "Chinese Language and Literature", - "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities" - ], - "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"], - "professional_tour_guide": [ - "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities" - ], - "legal_professional": [ - "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c", - "Humanities" - ], - "high_school_chinese": [ - "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities" - ], - "high_school_history": [ - "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities" - ], - "middle_school_history": [ - "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities" - ], - "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"], - "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"], - "plant_protection": [ - "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other" - ], - "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"], - "clinical_medicine": [ - "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other" - ], - "urban_and_rural_planner": [ - "Urban and Rural Planner", - "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other" - ], - "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"], - "fire_engineer": [ - "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other" - ], - "environmental_impact_assessment_engineer": [ - "Environmental Impact Assessment Engineer", - "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other" - ], - "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"], - "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"] + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], + 'operating_system': ['Operating System', '操作系统', 'STEM'], + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], + 'college_programming': ['College Programming', '大学编程', 'STEM'], + 'college_physics': ['College Physics', '大学物理', 'STEM'], + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], + 'education_science': ['Education Science', '教育学', 'Social Science'], + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], + 'logic': ['Logic', '逻辑学', 'Humanities'], + 'law': ['Law', '法学', 'Humanities'], + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], + 'high_school_history': ['High School History', '高中历史', 'Humanities'], + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], + 'civil_servant': ['Civil Servant', '公务员', 'Other'], + 'sports_science': ['Sports Science', '体育学', 'Other'], + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], + 'accountant': ['Accountant', '注册会计师', 'Other'], + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], + 'physician': ['Physician', '医师资格', 'Other'], } ceval_all_sets = list(ceval_subject_mapping.keys()) diff --git a/configs/datasets/ceval/ceval_ppl_578f8d.py b/configs/datasets/ceval/ceval_ppl_578f8d.py index 8447c86b..53048709 100644 --- a/configs/datasets/ceval/ceval_ppl_578f8d.py +++ b/configs/datasets/ceval/ceval_ppl_578f8d.py @@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import CEvalDataset ceval_subject_mapping = { - "computer_network": - ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"], - "operating_system": - ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"], - "computer_architecture": - ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"], - "college_programming": - ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"], - "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"], - "college_chemistry": - ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"], - "advanced_mathematics": - ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"], - "probability_and_statistics": - ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"], - "discrete_mathematics": - ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"], - "electrical_engineer": [ - "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08", - "STEM" - ], - "metrology_engineer": - ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"], - "high_school_mathematics": - ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"], - "high_school_physics": - ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"], - "high_school_chemistry": - ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"], - "high_school_biology": [ - "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_mathematics": [ - "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM" - ], - "middle_school_biology": [ - "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_physics": [ - "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM" - ], - "middle_school_chemistry": [ - "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM" - ], - "veterinary_medicine": [ - "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM" - ], - "college_economics": [ - "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science" - ], - "business_administration": [ - "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science" - ], - "marxism": [ - "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406", - "Social Science" - ], - "mao_zedong_thought": [ - "Mao Zedong Thought", - "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba", - "Social Science" - ], - "education_science": [ - "Education Science", "\u6559\u80b2\u5b66", "Social Science" - ], - "teacher_qualification": [ - "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science" - ], - "high_school_politics": [ - "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science" - ], - "high_school_geography": [ - "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science" - ], - "middle_school_politics": [ - "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science" - ], - "middle_school_geography": [ - "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science" - ], - "modern_chinese_history": - ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"], - "ideological_and_moral_cultivation": [ - "Ideological and Moral Cultivation", - "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840", - "Humanities" - ], - "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"], - "law": ["Law", "\u6cd5\u5b66", "Humanities"], - "chinese_language_and_literature": [ - "Chinese Language and Literature", - "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities" - ], - "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"], - "professional_tour_guide": [ - "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities" - ], - "legal_professional": [ - "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c", - "Humanities" - ], - "high_school_chinese": [ - "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities" - ], - "high_school_history": [ - "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities" - ], - "middle_school_history": [ - "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities" - ], - "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"], - "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"], - "plant_protection": [ - "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other" - ], - "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"], - "clinical_medicine": [ - "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other" - ], - "urban_and_rural_planner": [ - "Urban and Rural Planner", - "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other" - ], - "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"], - "fire_engineer": [ - "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other" - ], - "environmental_impact_assessment_engineer": [ - "Environmental Impact Assessment Engineer", - "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other" - ], - "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"], - "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"] + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], + 'operating_system': ['Operating System', '操作系统', 'STEM'], + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], + 'college_programming': ['College Programming', '大学编程', 'STEM'], + 'college_physics': ['College Physics', '大学物理', 'STEM'], + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], + 'education_science': ['Education Science', '教育学', 'Social Science'], + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], + 'logic': ['Logic', '逻辑学', 'Humanities'], + 'law': ['Law', '法学', 'Humanities'], + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], + 'high_school_history': ['High School History', '高中历史', 'Humanities'], + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], + 'civil_servant': ['Civil Servant', '公务员', 'Other'], + 'sports_science': ['Sports Science', '体育学', 'Other'], + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], + 'accountant': ['Accountant', '注册会计师', 'Other'], + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], + 'physician': ['Physician', '医师资格', 'Other'], } ceval_all_sets = list(ceval_subject_mapping.keys()) diff --git a/configs/datasets/ceval/ceval_ppl_93e5ce.py b/configs/datasets/ceval/ceval_ppl_93e5ce.py index 9deea61a..0df5d402 100644 --- a/configs/datasets/ceval/ceval_ppl_93e5ce.py +++ b/configs/datasets/ceval/ceval_ppl_93e5ce.py @@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import CEvalDataset ceval_subject_mapping = { - "computer_network": - ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"], - "operating_system": - ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"], - "computer_architecture": - ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"], - "college_programming": - ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"], - "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"], - "college_chemistry": - ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"], - "advanced_mathematics": - ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"], - "probability_and_statistics": - ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"], - "discrete_mathematics": - ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"], - "electrical_engineer": [ - "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08", - "STEM" - ], - "metrology_engineer": - ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"], - "high_school_mathematics": - ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"], - "high_school_physics": - ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"], - "high_school_chemistry": - ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"], - "high_school_biology": [ - "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_mathematics": [ - "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM" - ], - "middle_school_biology": [ - "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM" - ], - "middle_school_physics": [ - "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM" - ], - "middle_school_chemistry": [ - "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM" - ], - "veterinary_medicine": [ - "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM" - ], - "college_economics": [ - "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science" - ], - "business_administration": [ - "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science" - ], - "marxism": [ - "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406", - "Social Science" - ], - "mao_zedong_thought": [ - "Mao Zedong Thought", - "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba", - "Social Science" - ], - "education_science": [ - "Education Science", "\u6559\u80b2\u5b66", "Social Science" - ], - "teacher_qualification": [ - "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science" - ], - "high_school_politics": [ - "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science" - ], - "high_school_geography": [ - "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science" - ], - "middle_school_politics": [ - "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science" - ], - "middle_school_geography": [ - "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science" - ], - "modern_chinese_history": - ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"], - "ideological_and_moral_cultivation": [ - "Ideological and Moral Cultivation", - "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840", - "Humanities" - ], - "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"], - "law": ["Law", "\u6cd5\u5b66", "Humanities"], - "chinese_language_and_literature": [ - "Chinese Language and Literature", - "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities" - ], - "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"], - "professional_tour_guide": [ - "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities" - ], - "legal_professional": [ - "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c", - "Humanities" - ], - "high_school_chinese": [ - "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities" - ], - "high_school_history": [ - "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities" - ], - "middle_school_history": [ - "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities" - ], - "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"], - "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"], - "plant_protection": [ - "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other" - ], - "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"], - "clinical_medicine": [ - "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other" - ], - "urban_and_rural_planner": [ - "Urban and Rural Planner", - "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other" - ], - "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"], - "fire_engineer": [ - "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other" - ], - "environmental_impact_assessment_engineer": [ - "Environmental Impact Assessment Engineer", - "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other" - ], - "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"], - "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"] + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], + 'operating_system': ['Operating System', '操作系统', 'STEM'], + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], + 'college_programming': ['College Programming', '大学编程', 'STEM'], + 'college_physics': ['College Physics', '大学物理', 'STEM'], + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], + 'education_science': ['Education Science', '教育学', 'Social Science'], + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], + 'logic': ['Logic', '逻辑学', 'Humanities'], + 'law': ['Law', '法学', 'Humanities'], + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], + 'high_school_history': ['High School History', '高中历史', 'Humanities'], + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], + 'civil_servant': ['Civil Servant', '公务员', 'Other'], + 'sports_science': ['Sports Science', '体育学', 'Other'], + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], + 'accountant': ['Accountant', '注册会计师', 'Other'], + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], + 'physician': ['Physician', '医师资格', 'Other'], } ceval_all_sets = list(ceval_subject_mapping.keys()) diff --git a/configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py b/configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py new file mode 100644 index 00000000..5cb922d3 --- /dev/null +++ b/configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py @@ -0,0 +1,105 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import CEvalDataset +from opencompass.utils.text_postprocessors import first_option_postprocess + +ceval_subject_mapping = { + 'computer_network': ['Computer Network', '计算机网络', 'STEM'], + 'operating_system': ['Operating System', '操作系统', 'STEM'], + 'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'], + 'college_programming': ['College Programming', '大学编程', 'STEM'], + 'college_physics': ['College Physics', '大学物理', 'STEM'], + 'college_chemistry': ['College Chemistry', '大学化学', 'STEM'], + 'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'], + 'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'], + 'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'], + 'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'], + 'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'], + 'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'], + 'high_school_physics': ['High School Physics', '高中物理', 'STEM'], + 'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'], + 'high_school_biology': ['High School Biology', '高中生物', 'STEM'], + 'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'], + 'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'], + 'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'], + 'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'], + 'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'], + 'college_economics': ['College Economics', '大学经济学', 'Social Science'], + 'business_administration': ['Business Administration', '工商管理', 'Social Science'], + 'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'], + 'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'], + 'education_science': ['Education Science', '教育学', 'Social Science'], + 'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'], + 'high_school_politics': ['High School Politics', '高中政治', 'Social Science'], + 'high_school_geography': ['High School Geography', '高中地理', 'Social Science'], + 'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'], + 'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'], + 'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'], + 'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'], + 'logic': ['Logic', '逻辑学', 'Humanities'], + 'law': ['Law', '法学', 'Humanities'], + 'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'], + 'art_studies': ['Art Studies', '艺术学', 'Humanities'], + 'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'], + 'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'], + 'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'], + 'high_school_history': ['High School History', '高中历史', 'Humanities'], + 'middle_school_history': ['Middle School History', '初中历史', 'Humanities'], + 'civil_servant': ['Civil Servant', '公务员', 'Other'], + 'sports_science': ['Sports Science', '体育学', 'Other'], + 'plant_protection': ['Plant Protection', '植物保护', 'Other'], + 'basic_medicine': ['Basic Medicine', '基础医学', 'Other'], + 'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'], + 'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'], + 'accountant': ['Accountant', '注册会计师', 'Other'], + 'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'], + 'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'], + 'tax_accountant': ['Tax Accountant', '税务师', 'Other'], + 'physician': ['Physician', '医师资格', 'Other'], +} +ceval_all_sets = list(ceval_subject_mapping.keys()) + +ceval_datasets = [] +for _split in ["val"]: + for _name in ceval_all_sets: + _ch_name = ceval_subject_mapping[_name][1] + ceval_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin="", + round=[ + dict( + role="HUMAN", + prompt= + f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n让我们一步一步思考。答案: " + ), + dict(role="BOT", prompt="{answer}"), + ]), + ice_token="", + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=256), + ) + + ceval_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')) + + ceval_datasets.append( + dict( + type=CEvalDataset, + path="./data/ceval/formal_ceval", + name=_name, + abbr="ceval-" + _name if _split == "val" else "ceval-test-" + + _name, + reader_cfg=dict( + input_columns=["question", "A", "B", "C", "D"], + output_column="answer", + train_split="dev", + test_split=_split), + infer_cfg=ceval_infer_cfg, + eval_cfg=ceval_eval_cfg, + )) diff --git a/configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py b/configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py new file mode 100644 index 00000000..ca17a630 --- /dev/null +++ b/configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py @@ -0,0 +1,41 @@ +# Use FixKRetriever to avoid hang caused by the Huggingface +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import commonsenseqaDataset + +commonsenseqa_reader_cfg = dict( + input_columns=['question', 'A', 'B', 'C', 'D', 'E'], + output_column='answerKey', + test_split='validation') + +_ice_template = dict( + type=PromptTemplate, + template={ + ans: dict( + begin='', + round=[ + dict(role="HUMAN", prompt="Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer: "), + dict(role="BOT", prompt=f"{ans}"), + ]) + for ans in ['A', 'B', 'C', 'D', 'E'] + }, + ice_token='') + +commonsenseqa_infer_cfg = dict( + ice_template=_ice_template, + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4, 5, 6, 7]), + inferencer=dict(type=PPLInferencer)) + +commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +commonsenseqa_datasets = [ + dict( + abbr='commonsense_qa', + type=commonsenseqaDataset, + path='./data/commonsenseqa', + reader_cfg=commonsenseqa_reader_cfg, + infer_cfg=commonsenseqa_infer_cfg, + eval_cfg=commonsenseqa_eval_cfg) +] diff --git a/configs/datasets/ds1000/ds1000_gen_cbc84f.py b/configs/datasets/ds1000/ds1000_gen_cbc84f.py index e21efb0a..ba85e693 100644 --- a/configs/datasets/ds1000/ds1000_gen_cbc84f.py +++ b/configs/datasets/ds1000/ds1000_gen_cbc84f.py @@ -37,7 +37,7 @@ ds1000_datasets = [ dict( abbr=f"ds1000_{lib}", type=DS1000Dataset, - path="ds1000_data/", + path="./data/ds1000_data/", libs=f"{lib}", reader_cfg=ds1000_reader_cfg, infer_cfg=ds1000_infer_cfg, @@ -55,7 +55,7 @@ ds1000_datasets.append( dict( abbr="ds1000_Matplotlib", type=DS1000Dataset, - path="ds1000_data/", + path="./data/ds1000_data/", libs="Matplotlib", reader_cfg=ds1000_reader_cfg, infer_cfg=ds1000_infer_cfg, diff --git a/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py b/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py new file mode 100644 index 00000000..12e9a5c2 --- /dev/null +++ b/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py @@ -0,0 +1,67 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator + +ds1000_reader_cfg = dict( + input_columns=["prompt"], + output_column="test_column", + train_split='test', + test_split='test') + +ds1000_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt="{prompt}", + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +ds1000_eval_cfg_dict = { + lib: dict( + evaluator=dict( + type=DS1000ServiceEvaluator, + lib=lib, + ip_address= + "localhost", # replace to your code_eval_server ip_address, port + port=5000 + ), + pred_role="BOT") + for lib in [ + 'Pandas', + 'Numpy', + 'Tensorflow', + 'Scipy', + 'Sklearn', + 'Pytorch', + 'Matplotlib', + ] +} + +# The DS-1000 dataset can be downloaded from +# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip +ds1000_datasets = [ + dict( + abbr=f"ds1000_{lib}", + type=DS1000Dataset, + path="./data/ds1000_data/", + libs=f"{lib}", + reader_cfg=ds1000_reader_cfg, + infer_cfg=ds1000_infer_cfg, + eval_cfg=ds1000_eval_cfg_dict[lib], + ) for lib in [ + 'Pandas', + 'Numpy', + 'Tensorflow', + 'Scipy', + 'Sklearn', + 'Pytorch', + 'Matplotlib', + ] +] diff --git a/configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py b/configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py new file mode 100644 index 00000000..713cf187 --- /dev/null +++ b/configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py @@ -0,0 +1,33 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import hellaswagDataset_V2 + +hellaswag_reader_cfg = dict( + input_columns=['query', 'A', 'B', 'C', 'D'], + output_column='label') + +hellaswag_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + ans: dict(round=[ + dict(role="HUMAN", prompt="{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: "), + dict(role="BOT", prompt=f"{ans}"), + ]) for ans in ['A', 'B', 'C', 'D'] + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +hellaswag_datasets = [ + dict( + abbr='hellaswag', + type=hellaswagDataset_V2, + path='./data/hellaswag/hellaswag.jsonl', + reader_cfg=hellaswag_reader_cfg, + infer_cfg=hellaswag_infer_cfg, + eval_cfg=hellaswag_eval_cfg) +] diff --git a/configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py b/configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py new file mode 100644 index 00000000..4cd2887b --- /dev/null +++ b/configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py @@ -0,0 +1,123 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import MMLUDataset +from opencompass.utils.text_postprocessors import first_option_postprocess + +# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader +# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar + +mmlu_reader_cfg = dict( + input_columns=["input", "A", "B", "C", "D"], + output_column="target", + train_split='dev') + +mmlu_all_sets = [ + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_physics", + "electrical_engineering", + "astronomy", + "anatomy", + "abstract_algebra", + "machine_learning", + "clinical_knowledge", + "global_facts", + "management", + "nutrition", + "marketing", + "professional_accounting", + "high_school_geography", + "international_law", + "moral_scenarios", + "computer_security", + "high_school_microeconomics", + "professional_law", + "medical_genetics", + "professional_psychology", + "jurisprudence", + "world_religions", + "philosophy", + "virology", + "high_school_chemistry", + "public_relations", + "high_school_macroeconomics", + "human_sexuality", + "elementary_mathematics", + "high_school_physics", + "high_school_computer_science", + "high_school_european_history", + "business_ethics", + "moral_disputes", + "high_school_statistics", + "miscellaneous", + "formal_logic", + "high_school_government_and_politics", + "prehistory", + "security_studies", + "high_school_biology", + "logical_fallacies", + "high_school_world_history", + "professional_medicine", + "high_school_mathematics", + "college_medicine", + "high_school_us_history", + "sociology", + "econometrics", + "high_school_psychology", + "human_aging", + "us_foreign_policy", + "conceptual_physics", +] + + +mmlu_datasets = [] +for _name in mmlu_all_sets: + _hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.' + mmlu_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: " + ), + dict(role="BOT", prompt="{target}\n") + ]), + ), + prompt_template=dict( + type=PromptTemplate, + template=dict( + begin="", + round=[ + dict( + role="HUMAN", + prompt= + f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: " + ), + ], + ), + ice_token="", + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=256), + ) + + mmlu_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')) + + mmlu_datasets.append( + dict( + abbr=f"lukaemon_mmlu_{_name}", + type=MMLUDataset, + path="./data/mmlu/", + name=_name, + reader_cfg=mmlu_reader_cfg, + infer_cfg=mmlu_infer_cfg, + eval_cfg=mmlu_eval_cfg, + )) diff --git a/configs/models/bluelm/hf_bluelm_7b_base.py b/configs/models/bluelm/hf_bluelm_7b_base.py new file mode 100644 index 00000000..d7689864 --- /dev/null +++ b/configs/models/bluelm/hf_bluelm_7b_base.py @@ -0,0 +1,24 @@ +from opencompass.models import HuggingFaceCausalLM + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='bluelm-7b-base-hf', + path="vivo-ai/BlueLM-7B-Base", + tokenizer_path='vivo-ai/BlueLM-7B-Base', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + max_out_len=100, + max_seq_len=2048, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/bluelm/hf_bluelm_7b_base_32k.py b/configs/models/bluelm/hf_bluelm_7b_base_32k.py new file mode 100644 index 00000000..f319456f --- /dev/null +++ b/configs/models/bluelm/hf_bluelm_7b_base_32k.py @@ -0,0 +1,24 @@ +from opencompass.models import HuggingFaceCausalLM + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='bluelm-7b-base-32k-hf', + path="vivo-ai/BlueLM-7B-Base-32K", + tokenizer_path='vivo-ai/BlueLM-7B-Base-32K', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + max_out_len=100, + max_seq_len=4096, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/bluelm/hf_bluelm_7b_chat.py b/configs/models/bluelm/hf_bluelm_7b_chat.py new file mode 100644 index 00000000..31612ccf --- /dev/null +++ b/configs/models/bluelm/hf_bluelm_7b_chat.py @@ -0,0 +1,32 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='[|Human|]:'), + dict(role='BOT', begin='[|AI|]:', generate=True), + ], +) + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='bluelm-7b-chat-hf', + path="vivo-ai/BlueLM-7B-Chat", + tokenizer_path='vivo-ai/BlueLM-7B-Chat', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + meta_template=_meta_template, + max_out_len=100, + max_seq_len=2048, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/bluelm/hf_bluelm_7b_chat_32k.py b/configs/models/bluelm/hf_bluelm_7b_chat_32k.py new file mode 100644 index 00000000..9ec1f3a0 --- /dev/null +++ b/configs/models/bluelm/hf_bluelm_7b_chat_32k.py @@ -0,0 +1,32 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='[|Human|]:'), + dict(role='BOT', begin='[|AI|]:', generate=True), + ], +) + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='bluelm-7b-chat-32k-hf', + path="vivo-ai/BlueLM-7B-Chat-32K", + tokenizer_path='vivo-ai/BlueLM-7B-Chat-32K', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + meta_template=_meta_template, + max_out_len=100, + max_seq_len=4096, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base.py b/configs/models/nanbeige/hf_nanbeige_16b_base.py new file mode 100644 index 00000000..322f18a4 --- /dev/null +++ b/configs/models/nanbeige/hf_nanbeige_16b_base.py @@ -0,0 +1,33 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='', end=''), + dict(role='BOT', begin='', end='\n\n', generate=True), + ], +) + +models = [ + dict( + abbr='nanbeige-16b-base-hf', + type=HuggingFaceCausalLM, + path='Nanbeige/Nanbeige-16B-Base', + tokenizer_path='Nanbeige/Nanbeige-16B-Base', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + torch_dtype='auto', + ), + tokenizer_kwargs=dict( + padding_side='right', + truncation_side='left', + trust_remote_code=True + ), + meta_template=_meta_template, + batch_padding=False, + max_out_len=1024, + max_seq_len=4096, + batch_size=1, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py new file mode 100644 index 00000000..d0c1c2ea --- /dev/null +++ b/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py @@ -0,0 +1,34 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='', end=''), + dict(role='BOT', begin='', end='\n\n', generate=True), + ], +) + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='nanbeige-16b-base-32k-hf', + path="Nanbeige/Nanbeige-16B-Base-32K", + tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + torch_dtype='auto', + ), + tokenizer_kwargs=dict( + padding_side='right', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + meta_template=_meta_template, + batch_padding=False, + max_out_len=1024, + max_seq_len=8192, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/nanbeige/hf_nanbeige_16b_chat.py b/configs/models/nanbeige/hf_nanbeige_16b_chat.py new file mode 100644 index 00000000..9eb545d4 --- /dev/null +++ b/configs/models/nanbeige/hf_nanbeige_16b_chat.py @@ -0,0 +1,34 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='### Human: \n', end='\n\n'), + dict(role='BOT', begin='### Assistant: ', end='', generate=True), + ], +) + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='nanbeige-16b-chat-hf', + path="Nanbeige/Nanbeige-16B-Chat", + tokenizer_path='Nanbeige/Nanbeige-16B-Chat', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + torch_dtype='auto', + ), + tokenizer_kwargs=dict( + padding_side='right', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + meta_template=_meta_template, + batch_padding=False, + max_out_len=1024, + max_seq_len=4096, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py new file mode 100644 index 00000000..9ee6eb61 --- /dev/null +++ b/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py @@ -0,0 +1,34 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='### Human: \n', end='\n\n'), + dict(role='BOT', begin='### Assistant: ', end='', generate=True), + ], +) + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='nanbeige-16b-chat-32k-hf', + path="Nanbeige/Nanbeige-16B-Chat-32K", + tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + torch_dtype='auto', + ), + tokenizer_kwargs=dict( + padding_side='right', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + meta_template=_meta_template, + batch_padding=False, + max_out_len=1024, + max_seq_len=8192, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/others/hf_dolphin_21_mistral_7b.py b/configs/models/others/hf_dolphin_21_mistral_7b.py new file mode 100644 index 00000000..ecc0b196 --- /dev/null +++ b/configs/models/others/hf_dolphin_21_mistral_7b.py @@ -0,0 +1,33 @@ +from opencompass.models import HuggingFaceCausalLM + + +_meta_template = dict( + round=[ + dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), + dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), + ], + eos_token_id=2 +) + +models = [ + dict( + abbr='dolphin-2.2.1-mistral-7b-hf', + type=HuggingFaceCausalLM, + path='ehartford/dolphin-2.2.1-mistral-7b', + tokenizer_path='ehartford/dolphin-2.2.1-mistral-7b', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + ), + meta_template=_meta_template, + max_out_len=100, + max_seq_len=2048, + batch_size=8, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/models/others/hf_fashiongpt_70b_v11.py b/configs/models/others/hf_fashiongpt_70b_v11.py new file mode 100644 index 00000000..dbb2d7e4 --- /dev/null +++ b/configs/models/others/hf_fashiongpt_70b_v11.py @@ -0,0 +1,33 @@ +from opencompass.models import HuggingFaceCausalLM + + +_meta_template = dict( + round=[ + dict(role="HUMAN", begin='### User:\n', end='\n'), + dict(role="BOT", begin="### Assistant:\n", generate=True), + ], + eos_token_id=2 +) + +models = [ + dict( + abbr='fashiongpt-70b-v11-hf', + type=HuggingFaceCausalLM, + path='ICBU-NPU/FashionGPT-70B-V1.1', + tokenizer_path='ICBU-NPU/FashionGPT-70B-V1.1', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + ), + meta_template=_meta_template, + max_out_len=100, + max_seq_len=2048, + batch_size=8, + run_cfg=dict(num_gpus=8, num_procs=1), + ) +] diff --git a/configs/models/others/hf_orionstar_yi_34b_chat.py b/configs/models/others/hf_orionstar_yi_34b_chat.py new file mode 100644 index 00000000..0ad59e74 --- /dev/null +++ b/configs/models/others/hf_orionstar_yi_34b_chat.py @@ -0,0 +1,34 @@ +from opencompass.models import HuggingFaceCausalLM + + +_meta_template = dict( + begin='<|startoftext|>', + round=[ + dict(role="HUMAN", begin='Human: ', end='\n\n'), + dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True), + ], + eos_token_id=2 +) + +models = [ + dict( + abbr='orionstar-yi-34b-chat-hf', + type=HuggingFaceCausalLM, + path='OrionStarAI/OrionStar-Yi-34B-Chat', + tokenizer_path='OrionStarAI/OrionStar-Yi-34B-Chat', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + ), + meta_template=_meta_template, + max_out_len=100, + max_seq_len=2048, + batch_size=8, + run_cfg=dict(num_gpus=4, num_procs=1), + ) +] diff --git a/configs/summarizers/groups/ds1000.py b/configs/summarizers/groups/ds1000.py new file mode 100644 index 00000000..2dde8981 --- /dev/null +++ b/configs/summarizers/groups/ds1000.py @@ -0,0 +1,5 @@ +ds1000_summary_groups = [] + +_ds1000_all = ['Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch', 'Matplotlib'] +_ds1000_all = ['ds1000_' + d for d in _ds1000_all] +ds1000_summary_groups.append({'name': 'ds1000', 'subsets': _ds1000_all}) diff --git a/opencompass/datasets/ds1000.py b/opencompass/datasets/ds1000.py index 9a162677..c245f508 100644 --- a/opencompass/datasets/ds1000.py +++ b/opencompass/datasets/ds1000.py @@ -1,15 +1,19 @@ import configparser import importlib +import json import os +import os.path as osp import pickle import re import shutil import signal +import subprocess import sys import tempfile import threading from concurrent.futures import ProcessPoolExecutor from pathlib import Path +from shutil import copyfile from subprocess import PIPE, Popen from typing import Optional, Union @@ -20,6 +24,11 @@ from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS from .base import BaseDataset +_LIBRARY_NAME_LIST = [ + 'Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch', + 'Matplotlib' +] + @LOAD_DATASET.register_module() class DS1000Dataset(BaseDataset): @@ -323,3 +332,98 @@ def import_source_file(fname, modname): except FileNotFoundError as e: raise ImportError(f'{e.strerror}: {fname}') from e return module + + +class DS1000ServiceEvaluator(BaseEvaluator): + """Evaluator for ds1000 eval by using a service. + + Before you use this Evaluator, launch a code eval service according to: + https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html + + Args: + lib (str): The library to be evaluated. + ip_address (str): The IP Address of DS1000 code evaluate service. + Defaults to 'localhost'. + port (int): The port of DS1000 code evaluate service. + Defaults to 5000. + timeout (int): Maximum wait time when accessing the service, + Defaults to 100. + """ + + def __init__(self, + lib: str, + ip_address='localhost', + port=5000, + timeout=180) -> None: + assert lib in _LIBRARY_NAME_LIST, ( + f' lib must be in {_LIBRARY_NAME_LIST}') + self.lib = lib + self.ip_address = ip_address + self.port = port + self.timeout = timeout + super().__init__() + + def score(self, predictions, references): + processed_predictions = {} + assert len(predictions) == len(references) + for i, (pred, gold) in enumerate(zip(predictions, references)): + processed_predictions[str(i)] = {'prediction': pred, 'gold': gold} + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_out_path = osp.join(tmp_dir, f'ds1000_{self.lib}.json') + with open(tmp_out_path, 'w', encoding='utf-8') as json_file: + json.dump(processed_predictions, + json_file, + indent=4, + ensure_ascii=False) + + succeed, output = self._code_eval_service(file_path=tmp_out_path) + if succeed: + if isinstance(output, str): + return json.loads(output) + elif isinstance(output, dict): + return output + else: + result_file_path = os.path.join('outputs', + f'ds1000_{self.lib}.json') + copyfile(tmp_out_path, result_file_path) + ref_url = 'https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html' # noqa + raise Exception( + 'Call CodeEvalService Error in `DS1000ServiceEvaluator`, ' + 'The results have been saved in path ' + f"'{result_file_path}'. You need to check that your " + 'code evaluate service is launched and the network to ' + 'service is connected, you can also get results directly ' + f'by using `curl` command refer to {ref_url}.' + f'\nError Information: {output}') + + def _code_eval_service(self, file_path: str) -> tuple: + """Access the code eval service. + + Args: + file_path (str): The file path to the file to be evaluated. + + Returns: + tuple[bool, str]: Whether the access is successful and the output. + """ + exec_result = subprocess.run([ + 'curl', '-X', 'POST', '-F', f'file=@{file_path}', + f'{self.ip_address}:{self.port}/evaluate' + ], + timeout=self.timeout, + capture_output=True) + if exec_result.returncode == 0 and re.match( + "\"{.*:.*}\"", exec_result.stdout.decode('utf-8')): + return True, json.loads(exec_result.stdout.decode('utf-8')) + else: + if exec_result.stderr: + try: + err = exec_result.stderr.decode() + except Exception: + err = exec_result.stderr + else: + try: + err = exec_result.stdout.decode() + except Exception: + err = exec_result.stdout + return False, err diff --git a/opencompass/datasets/humaneval.py b/opencompass/datasets/humaneval.py index 1cd1b77b..e01f20a2 100644 --- a/opencompass/datasets/humaneval.py +++ b/opencompass/datasets/humaneval.py @@ -93,6 +93,7 @@ def humaneval_postprocess(text: str) -> str: if def_idx != -1: text = text[max(text.find('\n', def_idx) + 1, 0):] text = text.split('\n\n')[0] + text = text.lstrip('\n') if text.strip().startswith('def'): text = '\n'.join(text.split('\n')[1:]) if not text.startswith(' '): diff --git a/opencompass/datasets/mbpp.py b/opencompass/datasets/mbpp.py index fd8f2125..d1f05849 100644 --- a/opencompass/datasets/mbpp.py +++ b/opencompass/datasets/mbpp.py @@ -127,7 +127,9 @@ class MBPPEvaluator(BaseEvaluator): predictions = [self._process_answer(pred) for pred in predictions] result = {'pass': 0, 'timeout': 0, 'failed': 0, 'wrong_answer': 0} - for test_case, pred in zip(references, predictions): + details = {} + for index, (test_case, pred) in enumerate(zip(references, + predictions)): programs = self._process_test(test_case, pred) try: # Add exec globals to prevent the exec to raise @@ -136,15 +138,18 @@ class MBPPEvaluator(BaseEvaluator): with swallow_io(): with time_limit(2): exec(programs, exec_globals) - result['pass'] += 1 + r = 'pass' except TimeOutException: - result['timeout'] += 1 + r = 'timeout' except AssertionError: - result['wrong_answer'] += 1 + r = 'wrong_answer' except BaseException: - result['failed'] += 1 + r = 'failed' + result[r] += 1 + details[str(index)] = {'programs': programs, 'result': r} result['score'] = result['pass'] / len(predictions) * 100 + result['details'] = details return result def _process_answer(self, text): diff --git a/opencompass/summarizers/default.py b/opencompass/summarizers/default.py index 8ae4e896..4d39825b 100644 --- a/opencompass/summarizers/default.py +++ b/opencompass/summarizers/default.py @@ -147,26 +147,26 @@ class DefaultSummarizer: if all(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']): group_metrics = [default_metric] for dataset_abbr, metric in sg['subsets']: - scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric]) + scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric] eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown')) else: group_metrics = list(functools.reduce(lambda a, b: a & b, [set(dataset_metrics[dataset_abbr]) for dataset_abbr in sg['subsets']])) if len(group_metrics) > 1: for metric in group_metrics: for dataset_abbr in sg['subsets']: - scores.setdefault(metric, []).append(parsed_results[model_abbr][dataset_abbr][metric]) + scores.setdefault(metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric] eval_modes.append(dataset_eval_mode.get(sg['subsets'][0], 'unknown')) else: group_metrics = [default_metric] for dataset_abbr in sg['subsets']: metric = dataset_metrics[dataset_abbr][0] - scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric]) + scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric] eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown')) result = {} for metric in scores: if default_metric == 'standard_deviation': - avg = sum(scores[metric]) / len(scores[metric]) + avg = sum(scores[metric].values()) / len(scores[metric]) variance = sum((k - avg) ** 2 for k in scores[metric]) / len(scores[metric]) scores[metric] = result[metric] = math.sqrt(variance) else: @@ -174,7 +174,7 @@ class DefaultSummarizer: numerator = sum(scores[metric][k] * sg['weights'][k] for k in sg['weights']) denominator = sum(sg['weights'].values()) else: - numerator = sum(scores[metric]) + numerator = sum(scores[metric].values()) denominator = len(scores[metric]) scores[metric] = result[metric] = numerator / denominator eval_modes = list(set(eval_modes)) diff --git a/opencompass/utils/text_postprocessors.py b/opencompass/utils/text_postprocessors.py index dfd1cfe5..60e59a65 100644 --- a/opencompass/utils/text_postprocessors.py +++ b/opencompass/utils/text_postprocessors.py @@ -51,19 +51,53 @@ def first_capital_postprocess(text: str) -> str: def first_option_postprocess(text: str, options: str) -> str: """Find first valid option for text.""" + # yapf: disable + # flake8: noqa: W605 patterns = [ - f'[Tt]he answer is [{options}]', - f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa - f'答案(?:选项)?是(.*?)[{options}]', - f'答案(?:选项)?为(.*?)[{options}]', - f'答案(?:选项)?选(.*?)[{options}]', - f'选项[{options}]是?正确', - f'选项[{options}]为?正确', - f'固选(.*?)[{options}]', - f'答案应该是(.*?)[{options}]', - f'(\s|^)[{options}][\s。,,\.$]', # noqa + f'答案是?\s?([{options}])', + f'答案是?\s?:([{options}])', + f'答案是?\s?:([{options}])', + f'答案应该?是\s?([{options}])', + f'答案应该?选\s?([{options}])', + f'答案为\s?([{options}])', + f'答案选\s?([{options}])', + f'选择?\s?([{options}])', + f'只有选?项?\s?([{options}])\s?是?对', + f'只有选?项?\s?([{options}])\s?是?错', + f'只有选?项?\s?([{options}])\s?不?正确', + f'只有选?项?\s?([{options}])\s?错误', + f'说法不?对选?项?的?是\s?([{options}])', + f'说法不?正确选?项?的?是\s?([{options}])', + f'说法错误选?项?的?是\s?([{options}])', + f'([{options}])\s?是正确的', + f'([{options}])\s?是正确答案', + f'选项\s?([{options}])\s?正确', + f'所以答\s?([{options}])', + f'1.\s?([{options}])[.。$]?$', + f'所以\s?([{options}][.。$]?$)', + f'所有\s?([{options}][.。$]?$)', + f'[\s,::,]([{options}])[。,,\.]?$', + f'[\s,,::][故即]([{options}])[。\.]?$', + f'[\s,,::]因此([{options}])[。\.]?$', + f'[是为。]\s?([{options}])[。\.]?$', + f'因此\s?([{options}])[。\.]?$', + f'显然\s?([{options}])[。\.]?$', + f'1.\s?(.*?)$', + f'答案是\s?(\S+)(?:。|$)', + f'答案应该是\s?(\S+)(?:。|$)', + f'答案为\s?(\S+)(?:。|$)', + f'(\s|^)[{options}][\s。,,::\.$]', + f'[Tt]he answer is ([{options}])', + f'[Tt]he answer is option ([{options}])', + f'[Tt]he correct answer is ([{options}])', + f'[Tt]he correct answer is option ([{options}])', + f'[Tt]he answer to the question is ([{options}])', + f'([{options}]):', + f'(^|\s)[{options}](\s|$)', f'[{options}]', ] + # flake8: noqa + # yapf: enable regexes = [re.compile(pattern) for pattern in patterns] for regex in regexes: diff --git a/tools/prompt_viewer.py b/tools/prompt_viewer.py index 99b44922..eddb9b66 100644 --- a/tools/prompt_viewer.py +++ b/tools/prompt_viewer.py @@ -84,20 +84,17 @@ def print_prompts(model_cfg, dataset_cfg, count=1): if infer_cfg.inferencer.type == PPLInferencer: labels = retriever.get_labels(ice_template=ice_template, prompt_template=prompt_template) - ice = [ - retriever.generate_ice(ice_idx_list[_idx], - ice_template=ice_template) - for _idx in range(len(ice_idx_list)) - ] + ice = retriever.generate_ice(ice_idx_list[idx], + ice_template=ice_template) print('-' * 100) print('ICE Template:') print('-' * 100) - print(ice[0]) + print(ice) print('-' * 100) for label in labels: prompt = retriever.generate_label_prompt( idx, - ice[idx], + ice, label, ice_template=ice_template, prompt_template=prompt_template, @@ -111,11 +108,11 @@ def print_prompts(model_cfg, dataset_cfg, count=1): print(f'Truncating ice {num_ice} -> {num_ice - 1}', f'Number of tokens: {prompt_token_num} -> ...') ice_idx_list[idx] = ice_idx_list[idx][:-1] - ice[idx] = retriever.generate_ice( - ice_idx_list[idx], ice_template=ice_template) + ice = retriever.generate_ice(ice_idx_list[idx], + ice_template=ice_template) prompt = retriever.generate_label_prompt( idx, - ice[idx], + ice, label, ice_template=ice_template, prompt_template=prompt_template)