mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Sync] some renaming (#641)
This commit is contained in:
parent
68c4c1ef86
commit
9083dea683
@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
|
||||
"operating_system":
|
||||
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
|
||||
"computer_architecture":
|
||||
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
|
||||
"college_programming":
|
||||
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
|
||||
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
|
||||
"college_chemistry":
|
||||
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
|
||||
"advanced_mathematics":
|
||||
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
|
||||
"probability_and_statistics":
|
||||
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
|
||||
"discrete_mathematics":
|
||||
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
|
||||
"electrical_engineer": [
|
||||
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
|
||||
"STEM"
|
||||
],
|
||||
"metrology_engineer":
|
||||
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
|
||||
"high_school_mathematics":
|
||||
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
|
||||
"high_school_physics":
|
||||
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
|
||||
"high_school_chemistry":
|
||||
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
|
||||
"high_school_biology": [
|
||||
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_mathematics": [
|
||||
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
|
||||
],
|
||||
"middle_school_biology": [
|
||||
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_physics": [
|
||||
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
|
||||
],
|
||||
"middle_school_chemistry": [
|
||||
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
|
||||
],
|
||||
"veterinary_medicine": [
|
||||
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
|
||||
],
|
||||
"college_economics": [
|
||||
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
|
||||
],
|
||||
"business_administration": [
|
||||
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
|
||||
],
|
||||
"marxism": [
|
||||
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
|
||||
"Social Science"
|
||||
],
|
||||
"mao_zedong_thought": [
|
||||
"Mao Zedong Thought",
|
||||
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
|
||||
"Social Science"
|
||||
],
|
||||
"education_science": [
|
||||
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
|
||||
],
|
||||
"teacher_qualification": [
|
||||
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
|
||||
],
|
||||
"high_school_politics": [
|
||||
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"high_school_geography": [
|
||||
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"middle_school_politics": [
|
||||
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"middle_school_geography": [
|
||||
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"modern_chinese_history":
|
||||
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
|
||||
"ideological_and_moral_cultivation": [
|
||||
"Ideological and Moral Cultivation",
|
||||
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
|
||||
"Humanities"
|
||||
],
|
||||
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
|
||||
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
|
||||
"chinese_language_and_literature": [
|
||||
"Chinese Language and Literature",
|
||||
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
|
||||
],
|
||||
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
|
||||
"professional_tour_guide": [
|
||||
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
|
||||
],
|
||||
"legal_professional": [
|
||||
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
|
||||
"Humanities"
|
||||
],
|
||||
"high_school_chinese": [
|
||||
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
|
||||
],
|
||||
"high_school_history": [
|
||||
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"middle_school_history": [
|
||||
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
|
||||
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
|
||||
"plant_protection": [
|
||||
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
|
||||
],
|
||||
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
|
||||
"clinical_medicine": [
|
||||
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
|
||||
],
|
||||
"urban_and_rural_planner": [
|
||||
"Urban and Rural Planner",
|
||||
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
|
||||
],
|
||||
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
|
||||
"fire_engineer": [
|
||||
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"environmental_impact_assessment_engineer": [
|
||||
"Environmental Impact Assessment Engineer",
|
||||
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
|
||||
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
|
||||
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
|
||||
'operating_system': ['Operating System', '操作系统', 'STEM'],
|
||||
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
|
||||
'college_programming': ['College Programming', '大学编程', 'STEM'],
|
||||
'college_physics': ['College Physics', '大学物理', 'STEM'],
|
||||
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
|
||||
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
|
||||
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
|
||||
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
|
||||
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
|
||||
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
|
||||
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
|
||||
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
|
||||
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
|
||||
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
|
||||
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
|
||||
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
|
||||
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
|
||||
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
|
||||
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
|
||||
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
|
||||
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
|
||||
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
|
||||
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
|
||||
'education_science': ['Education Science', '教育学', 'Social Science'],
|
||||
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
|
||||
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
|
||||
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
|
||||
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
|
||||
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
|
||||
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
|
||||
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
|
||||
'logic': ['Logic', '逻辑学', 'Humanities'],
|
||||
'law': ['Law', '法学', 'Humanities'],
|
||||
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
|
||||
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
|
||||
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
|
||||
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
|
||||
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
|
||||
'high_school_history': ['High School History', '高中历史', 'Humanities'],
|
||||
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
|
||||
'civil_servant': ['Civil Servant', '公务员', 'Other'],
|
||||
'sports_science': ['Sports Science', '体育学', 'Other'],
|
||||
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
|
||||
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
|
||||
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
|
||||
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
|
||||
'accountant': ['Accountant', '注册会计师', 'Other'],
|
||||
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
|
||||
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
|
||||
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
|
||||
'physician': ['Physician', '医师资格', 'Other'],
|
||||
}
|
||||
ceval_all_sets = list(ceval_subject_mapping.keys())
|
||||
|
||||
|
@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
|
||||
"operating_system":
|
||||
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
|
||||
"computer_architecture":
|
||||
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
|
||||
"college_programming":
|
||||
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
|
||||
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
|
||||
"college_chemistry":
|
||||
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
|
||||
"advanced_mathematics":
|
||||
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
|
||||
"probability_and_statistics":
|
||||
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
|
||||
"discrete_mathematics":
|
||||
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
|
||||
"electrical_engineer": [
|
||||
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
|
||||
"STEM"
|
||||
],
|
||||
"metrology_engineer":
|
||||
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
|
||||
"high_school_mathematics":
|
||||
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
|
||||
"high_school_physics":
|
||||
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
|
||||
"high_school_chemistry":
|
||||
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
|
||||
"high_school_biology": [
|
||||
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_mathematics": [
|
||||
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
|
||||
],
|
||||
"middle_school_biology": [
|
||||
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_physics": [
|
||||
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
|
||||
],
|
||||
"middle_school_chemistry": [
|
||||
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
|
||||
],
|
||||
"veterinary_medicine": [
|
||||
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
|
||||
],
|
||||
"college_economics": [
|
||||
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
|
||||
],
|
||||
"business_administration": [
|
||||
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
|
||||
],
|
||||
"marxism": [
|
||||
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
|
||||
"Social Science"
|
||||
],
|
||||
"mao_zedong_thought": [
|
||||
"Mao Zedong Thought",
|
||||
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
|
||||
"Social Science"
|
||||
],
|
||||
"education_science": [
|
||||
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
|
||||
],
|
||||
"teacher_qualification": [
|
||||
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
|
||||
],
|
||||
"high_school_politics": [
|
||||
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"high_school_geography": [
|
||||
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"middle_school_politics": [
|
||||
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"middle_school_geography": [
|
||||
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"modern_chinese_history":
|
||||
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
|
||||
"ideological_and_moral_cultivation": [
|
||||
"Ideological and Moral Cultivation",
|
||||
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
|
||||
"Humanities"
|
||||
],
|
||||
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
|
||||
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
|
||||
"chinese_language_and_literature": [
|
||||
"Chinese Language and Literature",
|
||||
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
|
||||
],
|
||||
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
|
||||
"professional_tour_guide": [
|
||||
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
|
||||
],
|
||||
"legal_professional": [
|
||||
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
|
||||
"Humanities"
|
||||
],
|
||||
"high_school_chinese": [
|
||||
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
|
||||
],
|
||||
"high_school_history": [
|
||||
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"middle_school_history": [
|
||||
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
|
||||
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
|
||||
"plant_protection": [
|
||||
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
|
||||
],
|
||||
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
|
||||
"clinical_medicine": [
|
||||
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
|
||||
],
|
||||
"urban_and_rural_planner": [
|
||||
"Urban and Rural Planner",
|
||||
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
|
||||
],
|
||||
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
|
||||
"fire_engineer": [
|
||||
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"environmental_impact_assessment_engineer": [
|
||||
"Environmental Impact Assessment Engineer",
|
||||
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
|
||||
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
|
||||
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
|
||||
'operating_system': ['Operating System', '操作系统', 'STEM'],
|
||||
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
|
||||
'college_programming': ['College Programming', '大学编程', 'STEM'],
|
||||
'college_physics': ['College Physics', '大学物理', 'STEM'],
|
||||
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
|
||||
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
|
||||
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
|
||||
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
|
||||
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
|
||||
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
|
||||
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
|
||||
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
|
||||
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
|
||||
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
|
||||
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
|
||||
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
|
||||
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
|
||||
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
|
||||
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
|
||||
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
|
||||
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
|
||||
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
|
||||
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
|
||||
'education_science': ['Education Science', '教育学', 'Social Science'],
|
||||
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
|
||||
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
|
||||
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
|
||||
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
|
||||
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
|
||||
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
|
||||
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
|
||||
'logic': ['Logic', '逻辑学', 'Humanities'],
|
||||
'law': ['Law', '法学', 'Humanities'],
|
||||
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
|
||||
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
|
||||
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
|
||||
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
|
||||
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
|
||||
'high_school_history': ['High School History', '高中历史', 'Humanities'],
|
||||
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
|
||||
'civil_servant': ['Civil Servant', '公务员', 'Other'],
|
||||
'sports_science': ['Sports Science', '体育学', 'Other'],
|
||||
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
|
||||
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
|
||||
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
|
||||
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
|
||||
'accountant': ['Accountant', '注册会计师', 'Other'],
|
||||
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
|
||||
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
|
||||
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
|
||||
'physician': ['Physician', '医师资格', 'Other'],
|
||||
}
|
||||
ceval_all_sets = list(ceval_subject_mapping.keys())
|
||||
|
||||
|
@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CEvalDataset
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
|
||||
"operating_system":
|
||||
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
|
||||
"computer_architecture":
|
||||
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
|
||||
"college_programming":
|
||||
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
|
||||
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
|
||||
"college_chemistry":
|
||||
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
|
||||
"advanced_mathematics":
|
||||
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
|
||||
"probability_and_statistics":
|
||||
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
|
||||
"discrete_mathematics":
|
||||
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
|
||||
"electrical_engineer": [
|
||||
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
|
||||
"STEM"
|
||||
],
|
||||
"metrology_engineer":
|
||||
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
|
||||
"high_school_mathematics":
|
||||
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
|
||||
"high_school_physics":
|
||||
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
|
||||
"high_school_chemistry":
|
||||
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
|
||||
"high_school_biology": [
|
||||
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_mathematics": [
|
||||
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
|
||||
],
|
||||
"middle_school_biology": [
|
||||
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_physics": [
|
||||
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
|
||||
],
|
||||
"middle_school_chemistry": [
|
||||
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
|
||||
],
|
||||
"veterinary_medicine": [
|
||||
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
|
||||
],
|
||||
"college_economics": [
|
||||
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
|
||||
],
|
||||
"business_administration": [
|
||||
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
|
||||
],
|
||||
"marxism": [
|
||||
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
|
||||
"Social Science"
|
||||
],
|
||||
"mao_zedong_thought": [
|
||||
"Mao Zedong Thought",
|
||||
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
|
||||
"Social Science"
|
||||
],
|
||||
"education_science": [
|
||||
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
|
||||
],
|
||||
"teacher_qualification": [
|
||||
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
|
||||
],
|
||||
"high_school_politics": [
|
||||
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"high_school_geography": [
|
||||
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"middle_school_politics": [
|
||||
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"middle_school_geography": [
|
||||
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"modern_chinese_history":
|
||||
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
|
||||
"ideological_and_moral_cultivation": [
|
||||
"Ideological and Moral Cultivation",
|
||||
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
|
||||
"Humanities"
|
||||
],
|
||||
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
|
||||
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
|
||||
"chinese_language_and_literature": [
|
||||
"Chinese Language and Literature",
|
||||
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
|
||||
],
|
||||
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
|
||||
"professional_tour_guide": [
|
||||
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
|
||||
],
|
||||
"legal_professional": [
|
||||
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
|
||||
"Humanities"
|
||||
],
|
||||
"high_school_chinese": [
|
||||
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
|
||||
],
|
||||
"high_school_history": [
|
||||
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"middle_school_history": [
|
||||
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
|
||||
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
|
||||
"plant_protection": [
|
||||
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
|
||||
],
|
||||
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
|
||||
"clinical_medicine": [
|
||||
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
|
||||
],
|
||||
"urban_and_rural_planner": [
|
||||
"Urban and Rural Planner",
|
||||
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
|
||||
],
|
||||
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
|
||||
"fire_engineer": [
|
||||
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"environmental_impact_assessment_engineer": [
|
||||
"Environmental Impact Assessment Engineer",
|
||||
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
|
||||
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
|
||||
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
|
||||
'operating_system': ['Operating System', '操作系统', 'STEM'],
|
||||
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
|
||||
'college_programming': ['College Programming', '大学编程', 'STEM'],
|
||||
'college_physics': ['College Physics', '大学物理', 'STEM'],
|
||||
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
|
||||
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
|
||||
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
|
||||
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
|
||||
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
|
||||
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
|
||||
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
|
||||
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
|
||||
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
|
||||
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
|
||||
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
|
||||
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
|
||||
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
|
||||
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
|
||||
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
|
||||
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
|
||||
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
|
||||
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
|
||||
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
|
||||
'education_science': ['Education Science', '教育学', 'Social Science'],
|
||||
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
|
||||
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
|
||||
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
|
||||
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
|
||||
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
|
||||
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
|
||||
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
|
||||
'logic': ['Logic', '逻辑学', 'Humanities'],
|
||||
'law': ['Law', '法学', 'Humanities'],
|
||||
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
|
||||
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
|
||||
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
|
||||
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
|
||||
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
|
||||
'high_school_history': ['High School History', '高中历史', 'Humanities'],
|
||||
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
|
||||
'civil_servant': ['Civil Servant', '公务员', 'Other'],
|
||||
'sports_science': ['Sports Science', '体育学', 'Other'],
|
||||
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
|
||||
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
|
||||
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
|
||||
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
|
||||
'accountant': ['Accountant', '注册会计师', 'Other'],
|
||||
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
|
||||
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
|
||||
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
|
||||
'physician': ['Physician', '医师资格', 'Other'],
|
||||
}
|
||||
ceval_all_sets = list(ceval_subject_mapping.keys())
|
||||
|
||||
|
@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CEvalDataset
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
|
||||
"operating_system":
|
||||
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
|
||||
"computer_architecture":
|
||||
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
|
||||
"college_programming":
|
||||
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
|
||||
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
|
||||
"college_chemistry":
|
||||
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
|
||||
"advanced_mathematics":
|
||||
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
|
||||
"probability_and_statistics":
|
||||
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
|
||||
"discrete_mathematics":
|
||||
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
|
||||
"electrical_engineer": [
|
||||
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
|
||||
"STEM"
|
||||
],
|
||||
"metrology_engineer":
|
||||
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
|
||||
"high_school_mathematics":
|
||||
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
|
||||
"high_school_physics":
|
||||
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
|
||||
"high_school_chemistry":
|
||||
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
|
||||
"high_school_biology": [
|
||||
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_mathematics": [
|
||||
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
|
||||
],
|
||||
"middle_school_biology": [
|
||||
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
|
||||
],
|
||||
"middle_school_physics": [
|
||||
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
|
||||
],
|
||||
"middle_school_chemistry": [
|
||||
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
|
||||
],
|
||||
"veterinary_medicine": [
|
||||
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
|
||||
],
|
||||
"college_economics": [
|
||||
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
|
||||
],
|
||||
"business_administration": [
|
||||
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
|
||||
],
|
||||
"marxism": [
|
||||
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
|
||||
"Social Science"
|
||||
],
|
||||
"mao_zedong_thought": [
|
||||
"Mao Zedong Thought",
|
||||
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
|
||||
"Social Science"
|
||||
],
|
||||
"education_science": [
|
||||
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
|
||||
],
|
||||
"teacher_qualification": [
|
||||
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
|
||||
],
|
||||
"high_school_politics": [
|
||||
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"high_school_geography": [
|
||||
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"middle_school_politics": [
|
||||
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
|
||||
],
|
||||
"middle_school_geography": [
|
||||
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
|
||||
],
|
||||
"modern_chinese_history":
|
||||
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
|
||||
"ideological_and_moral_cultivation": [
|
||||
"Ideological and Moral Cultivation",
|
||||
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
|
||||
"Humanities"
|
||||
],
|
||||
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
|
||||
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
|
||||
"chinese_language_and_literature": [
|
||||
"Chinese Language and Literature",
|
||||
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
|
||||
],
|
||||
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
|
||||
"professional_tour_guide": [
|
||||
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
|
||||
],
|
||||
"legal_professional": [
|
||||
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
|
||||
"Humanities"
|
||||
],
|
||||
"high_school_chinese": [
|
||||
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
|
||||
],
|
||||
"high_school_history": [
|
||||
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"middle_school_history": [
|
||||
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
|
||||
],
|
||||
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
|
||||
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
|
||||
"plant_protection": [
|
||||
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
|
||||
],
|
||||
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
|
||||
"clinical_medicine": [
|
||||
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
|
||||
],
|
||||
"urban_and_rural_planner": [
|
||||
"Urban and Rural Planner",
|
||||
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
|
||||
],
|
||||
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
|
||||
"fire_engineer": [
|
||||
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"environmental_impact_assessment_engineer": [
|
||||
"Environmental Impact Assessment Engineer",
|
||||
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
|
||||
],
|
||||
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
|
||||
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
|
||||
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
|
||||
'operating_system': ['Operating System', '操作系统', 'STEM'],
|
||||
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
|
||||
'college_programming': ['College Programming', '大学编程', 'STEM'],
|
||||
'college_physics': ['College Physics', '大学物理', 'STEM'],
|
||||
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
|
||||
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
|
||||
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
|
||||
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
|
||||
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
|
||||
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
|
||||
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
|
||||
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
|
||||
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
|
||||
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
|
||||
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
|
||||
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
|
||||
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
|
||||
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
|
||||
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
|
||||
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
|
||||
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
|
||||
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
|
||||
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
|
||||
'education_science': ['Education Science', '教育学', 'Social Science'],
|
||||
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
|
||||
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
|
||||
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
|
||||
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
|
||||
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
|
||||
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
|
||||
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
|
||||
'logic': ['Logic', '逻辑学', 'Humanities'],
|
||||
'law': ['Law', '法学', 'Humanities'],
|
||||
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
|
||||
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
|
||||
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
|
||||
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
|
||||
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
|
||||
'high_school_history': ['High School History', '高中历史', 'Humanities'],
|
||||
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
|
||||
'civil_servant': ['Civil Servant', '公务员', 'Other'],
|
||||
'sports_science': ['Sports Science', '体育学', 'Other'],
|
||||
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
|
||||
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
|
||||
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
|
||||
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
|
||||
'accountant': ['Accountant', '注册会计师', 'Other'],
|
||||
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
|
||||
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
|
||||
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
|
||||
'physician': ['Physician', '医师资格', 'Other'],
|
||||
}
|
||||
ceval_all_sets = list(ceval_subject_mapping.keys())
|
||||
|
||||
|
105
configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py
Normal file
105
configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py
Normal file
@ -0,0 +1,105 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CEvalDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
ceval_subject_mapping = {
|
||||
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
|
||||
'operating_system': ['Operating System', '操作系统', 'STEM'],
|
||||
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
|
||||
'college_programming': ['College Programming', '大学编程', 'STEM'],
|
||||
'college_physics': ['College Physics', '大学物理', 'STEM'],
|
||||
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
|
||||
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
|
||||
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
|
||||
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
|
||||
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
|
||||
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
|
||||
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
|
||||
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
|
||||
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
|
||||
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
|
||||
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
|
||||
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
|
||||
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
|
||||
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
|
||||
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
|
||||
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
|
||||
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
|
||||
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
|
||||
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
|
||||
'education_science': ['Education Science', '教育学', 'Social Science'],
|
||||
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
|
||||
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
|
||||
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
|
||||
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
|
||||
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
|
||||
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
|
||||
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
|
||||
'logic': ['Logic', '逻辑学', 'Humanities'],
|
||||
'law': ['Law', '法学', 'Humanities'],
|
||||
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
|
||||
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
|
||||
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
|
||||
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
|
||||
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
|
||||
'high_school_history': ['High School History', '高中历史', 'Humanities'],
|
||||
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
|
||||
'civil_servant': ['Civil Servant', '公务员', 'Other'],
|
||||
'sports_science': ['Sports Science', '体育学', 'Other'],
|
||||
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
|
||||
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
|
||||
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
|
||||
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
|
||||
'accountant': ['Accountant', '注册会计师', 'Other'],
|
||||
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
|
||||
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
|
||||
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
|
||||
'physician': ['Physician', '医师资格', 'Other'],
|
||||
}
|
||||
ceval_all_sets = list(ceval_subject_mapping.keys())
|
||||
|
||||
ceval_datasets = []
|
||||
for _split in ["val"]:
|
||||
for _name in ceval_all_sets:
|
||||
_ch_name = ceval_subject_mapping[_name][1]
|
||||
ceval_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n让我们一步一步思考。答案: "
|
||||
),
|
||||
dict(role="BOT", prompt="{answer}"),
|
||||
]),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=256),
|
||||
)
|
||||
|
||||
ceval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
ceval_datasets.append(
|
||||
dict(
|
||||
type=CEvalDataset,
|
||||
path="./data/ceval/formal_ceval",
|
||||
name=_name,
|
||||
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
|
||||
_name,
|
||||
reader_cfg=dict(
|
||||
input_columns=["question", "A", "B", "C", "D"],
|
||||
output_column="answer",
|
||||
train_split="dev",
|
||||
test_split=_split),
|
||||
infer_cfg=ceval_infer_cfg,
|
||||
eval_cfg=ceval_eval_cfg,
|
||||
))
|
41
configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py
Normal file
41
configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Use FixKRetriever to avoid hang caused by the Huggingface
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import commonsenseqaDataset
|
||||
|
||||
commonsenseqa_reader_cfg = dict(
|
||||
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
|
||||
output_column='answerKey',
|
||||
test_split='validation')
|
||||
|
||||
_ice_template = dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
ans: dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(role="HUMAN", prompt="Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer: "),
|
||||
dict(role="BOT", prompt=f"{ans}"),
|
||||
])
|
||||
for ans in ['A', 'B', 'C', 'D', 'E']
|
||||
},
|
||||
ice_token='</E>')
|
||||
|
||||
commonsenseqa_infer_cfg = dict(
|
||||
ice_template=_ice_template,
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
abbr='commonsense_qa',
|
||||
type=commonsenseqaDataset,
|
||||
path='./data/commonsenseqa',
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg)
|
||||
]
|
@ -37,7 +37,7 @@ ds1000_datasets = [
|
||||
dict(
|
||||
abbr=f"ds1000_{lib}",
|
||||
type=DS1000Dataset,
|
||||
path="ds1000_data/",
|
||||
path="./data/ds1000_data/",
|
||||
libs=f"{lib}",
|
||||
reader_cfg=ds1000_reader_cfg,
|
||||
infer_cfg=ds1000_infer_cfg,
|
||||
@ -55,7 +55,7 @@ ds1000_datasets.append(
|
||||
dict(
|
||||
abbr="ds1000_Matplotlib",
|
||||
type=DS1000Dataset,
|
||||
path="ds1000_data/",
|
||||
path="./data/ds1000_data/",
|
||||
libs="Matplotlib",
|
||||
reader_cfg=ds1000_reader_cfg,
|
||||
infer_cfg=ds1000_infer_cfg,
|
||||
|
67
configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
Normal file
67
configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
Normal file
@ -0,0 +1,67 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
|
||||
|
||||
ds1000_reader_cfg = dict(
|
||||
input_columns=["prompt"],
|
||||
output_column="test_column",
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
ds1000_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="{prompt}",
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
ds1000_eval_cfg_dict = {
|
||||
lib: dict(
|
||||
evaluator=dict(
|
||||
type=DS1000ServiceEvaluator,
|
||||
lib=lib,
|
||||
ip_address=
|
||||
"localhost", # replace to your code_eval_server ip_address, port
|
||||
port=5000
|
||||
),
|
||||
pred_role="BOT")
|
||||
for lib in [
|
||||
'Pandas',
|
||||
'Numpy',
|
||||
'Tensorflow',
|
||||
'Scipy',
|
||||
'Sklearn',
|
||||
'Pytorch',
|
||||
'Matplotlib',
|
||||
]
|
||||
}
|
||||
|
||||
# The DS-1000 dataset can be downloaded from
|
||||
# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
|
||||
ds1000_datasets = [
|
||||
dict(
|
||||
abbr=f"ds1000_{lib}",
|
||||
type=DS1000Dataset,
|
||||
path="./data/ds1000_data/",
|
||||
libs=f"{lib}",
|
||||
reader_cfg=ds1000_reader_cfg,
|
||||
infer_cfg=ds1000_infer_cfg,
|
||||
eval_cfg=ds1000_eval_cfg_dict[lib],
|
||||
) for lib in [
|
||||
'Pandas',
|
||||
'Numpy',
|
||||
'Tensorflow',
|
||||
'Scipy',
|
||||
'Sklearn',
|
||||
'Pytorch',
|
||||
'Matplotlib',
|
||||
]
|
||||
]
|
33
configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py
Normal file
33
configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py
Normal file
@ -0,0 +1,33 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import hellaswagDataset_V2
|
||||
|
||||
hellaswag_reader_cfg = dict(
|
||||
input_columns=['query', 'A', 'B', 'C', 'D'],
|
||||
output_column='label')
|
||||
|
||||
hellaswag_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
ans: dict(round=[
|
||||
dict(role="HUMAN", prompt="{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: "),
|
||||
dict(role="BOT", prompt=f"{ans}"),
|
||||
]) for ans in ['A', 'B', 'C', 'D']
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
hellaswag_datasets = [
|
||||
dict(
|
||||
abbr='hellaswag',
|
||||
type=hellaswagDataset_V2,
|
||||
path='./data/hellaswag/hellaswag.jsonl',
|
||||
reader_cfg=hellaswag_reader_cfg,
|
||||
infer_cfg=hellaswag_infer_cfg,
|
||||
eval_cfg=hellaswag_eval_cfg)
|
||||
]
|
123
configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py
Normal file
123
configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py
Normal file
@ -0,0 +1,123 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MMLUDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
||||
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
||||
|
||||
mmlu_reader_cfg = dict(
|
||||
input_columns=["input", "A", "B", "C", "D"],
|
||||
output_column="target",
|
||||
train_split='dev')
|
||||
|
||||
mmlu_all_sets = [
|
||||
"college_biology",
|
||||
"college_chemistry",
|
||||
"college_computer_science",
|
||||
"college_mathematics",
|
||||
"college_physics",
|
||||
"electrical_engineering",
|
||||
"astronomy",
|
||||
"anatomy",
|
||||
"abstract_algebra",
|
||||
"machine_learning",
|
||||
"clinical_knowledge",
|
||||
"global_facts",
|
||||
"management",
|
||||
"nutrition",
|
||||
"marketing",
|
||||
"professional_accounting",
|
||||
"high_school_geography",
|
||||
"international_law",
|
||||
"moral_scenarios",
|
||||
"computer_security",
|
||||
"high_school_microeconomics",
|
||||
"professional_law",
|
||||
"medical_genetics",
|
||||
"professional_psychology",
|
||||
"jurisprudence",
|
||||
"world_religions",
|
||||
"philosophy",
|
||||
"virology",
|
||||
"high_school_chemistry",
|
||||
"public_relations",
|
||||
"high_school_macroeconomics",
|
||||
"human_sexuality",
|
||||
"elementary_mathematics",
|
||||
"high_school_physics",
|
||||
"high_school_computer_science",
|
||||
"high_school_european_history",
|
||||
"business_ethics",
|
||||
"moral_disputes",
|
||||
"high_school_statistics",
|
||||
"miscellaneous",
|
||||
"formal_logic",
|
||||
"high_school_government_and_politics",
|
||||
"prehistory",
|
||||
"security_studies",
|
||||
"high_school_biology",
|
||||
"logical_fallacies",
|
||||
"high_school_world_history",
|
||||
"professional_medicine",
|
||||
"high_school_mathematics",
|
||||
"college_medicine",
|
||||
"high_school_us_history",
|
||||
"sociology",
|
||||
"econometrics",
|
||||
"high_school_psychology",
|
||||
"human_aging",
|
||||
"us_foreign_policy",
|
||||
"conceptual_physics",
|
||||
]
|
||||
|
||||
|
||||
mmlu_datasets = []
|
||||
for _name in mmlu_all_sets:
|
||||
_hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
|
||||
mmlu_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
|
||||
),
|
||||
dict(role="BOT", prompt="{target}\n")
|
||||
]),
|
||||
),
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: "
|
||||
),
|
||||
],
|
||||
),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=256),
|
||||
)
|
||||
|
||||
mmlu_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
mmlu_datasets.append(
|
||||
dict(
|
||||
abbr=f"lukaemon_mmlu_{_name}",
|
||||
type=MMLUDataset,
|
||||
path="./data/mmlu/",
|
||||
name=_name,
|
||||
reader_cfg=mmlu_reader_cfg,
|
||||
infer_cfg=mmlu_infer_cfg,
|
||||
eval_cfg=mmlu_eval_cfg,
|
||||
))
|
24
configs/models/bluelm/hf_bluelm_7b_base.py
Normal file
24
configs/models/bluelm/hf_bluelm_7b_base.py
Normal file
@ -0,0 +1,24 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='bluelm-7b-base-hf',
|
||||
path="vivo-ai/BlueLM-7B-Base",
|
||||
tokenizer_path='vivo-ai/BlueLM-7B-Base',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
24
configs/models/bluelm/hf_bluelm_7b_base_32k.py
Normal file
24
configs/models/bluelm/hf_bluelm_7b_base_32k.py
Normal file
@ -0,0 +1,24 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='bluelm-7b-base-32k-hf',
|
||||
path="vivo-ai/BlueLM-7B-Base-32K",
|
||||
tokenizer_path='vivo-ai/BlueLM-7B-Base-32K',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
max_out_len=100,
|
||||
max_seq_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
32
configs/models/bluelm/hf_bluelm_7b_chat.py
Normal file
32
configs/models/bluelm/hf_bluelm_7b_chat.py
Normal file
@ -0,0 +1,32 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='[|Human|]:'),
|
||||
dict(role='BOT', begin='[|AI|]:', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='bluelm-7b-chat-hf',
|
||||
path="vivo-ai/BlueLM-7B-Chat",
|
||||
tokenizer_path='vivo-ai/BlueLM-7B-Chat',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
32
configs/models/bluelm/hf_bluelm_7b_chat_32k.py
Normal file
32
configs/models/bluelm/hf_bluelm_7b_chat_32k.py
Normal file
@ -0,0 +1,32 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='[|Human|]:'),
|
||||
dict(role='BOT', begin='[|AI|]:', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='bluelm-7b-chat-32k-hf',
|
||||
path="vivo-ai/BlueLM-7B-Chat-32K",
|
||||
tokenizer_path='vivo-ai/BlueLM-7B-Chat-32K',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
33
configs/models/nanbeige/hf_nanbeige_16b_base.py
Normal file
33
configs/models/nanbeige/hf_nanbeige_16b_base.py
Normal file
@ -0,0 +1,33 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='', end=''),
|
||||
dict(role='BOT', begin='', end='\n\n', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='nanbeige-16b-base-hf',
|
||||
type=HuggingFaceCausalLM,
|
||||
path='Nanbeige/Nanbeige-16B-Base',
|
||||
tokenizer_path='Nanbeige/Nanbeige-16B-Base',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
torch_dtype='auto',
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='right',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
batch_padding=False,
|
||||
max_out_len=1024,
|
||||
max_seq_len=4096,
|
||||
batch_size=1,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
34
configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
Normal file
34
configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='', end=''),
|
||||
dict(role='BOT', begin='', end='\n\n', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='nanbeige-16b-base-32k-hf',
|
||||
path="Nanbeige/Nanbeige-16B-Base-32K",
|
||||
tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
torch_dtype='auto',
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='right',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
batch_padding=False,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
34
configs/models/nanbeige/hf_nanbeige_16b_chat.py
Normal file
34
configs/models/nanbeige/hf_nanbeige_16b_chat.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
|
||||
dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='nanbeige-16b-chat-hf',
|
||||
path="Nanbeige/Nanbeige-16B-Chat",
|
||||
tokenizer_path='Nanbeige/Nanbeige-16B-Chat',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
torch_dtype='auto',
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='right',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
batch_padding=False,
|
||||
max_out_len=1024,
|
||||
max_seq_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
34
configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
Normal file
34
configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
|
||||
dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
|
||||
],
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceCausalLM,
|
||||
abbr='nanbeige-16b-chat-32k-hf',
|
||||
path="Nanbeige/Nanbeige-16B-Chat-32K",
|
||||
tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
torch_dtype='auto',
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='right',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
use_fast=False,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
batch_padding=False,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
33
configs/models/others/hf_dolphin_21_mistral_7b.py
Normal file
33
configs/models/others/hf_dolphin_21_mistral_7b.py
Normal file
@ -0,0 +1,33 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
|
||||
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
|
||||
],
|
||||
eos_token_id=2
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='dolphin-2.2.1-mistral-7b-hf',
|
||||
type=HuggingFaceCausalLM,
|
||||
path='ehartford/dolphin-2.2.1-mistral-7b',
|
||||
tokenizer_path='ehartford/dolphin-2.2.1-mistral-7b',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
33
configs/models/others/hf_fashiongpt_70b_v11.py
Normal file
33
configs/models/others/hf_fashiongpt_70b_v11.py
Normal file
@ -0,0 +1,33 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
|
||||
_meta_template = dict(
|
||||
round=[
|
||||
dict(role="HUMAN", begin='### User:\n', end='\n'),
|
||||
dict(role="BOT", begin="### Assistant:\n", generate=True),
|
||||
],
|
||||
eos_token_id=2
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='fashiongpt-70b-v11-hf',
|
||||
type=HuggingFaceCausalLM,
|
||||
path='ICBU-NPU/FashionGPT-70B-V1.1',
|
||||
tokenizer_path='ICBU-NPU/FashionGPT-70B-V1.1',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=8, num_procs=1),
|
||||
)
|
||||
]
|
34
configs/models/others/hf_orionstar_yi_34b_chat.py
Normal file
34
configs/models/others/hf_orionstar_yi_34b_chat.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.models import HuggingFaceCausalLM
|
||||
|
||||
|
||||
_meta_template = dict(
|
||||
begin='<|startoftext|>',
|
||||
round=[
|
||||
dict(role="HUMAN", begin='Human: ', end='\n\n'),
|
||||
dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True),
|
||||
],
|
||||
eos_token_id=2
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='orionstar-yi-34b-chat-hf',
|
||||
type=HuggingFaceCausalLM,
|
||||
path='OrionStarAI/OrionStar-Yi-34B-Chat',
|
||||
tokenizer_path='OrionStarAI/OrionStar-Yi-34B-Chat',
|
||||
model_kwargs=dict(
|
||||
device_map='auto',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
tokenizer_kwargs=dict(
|
||||
padding_side='left',
|
||||
truncation_side='left',
|
||||
trust_remote_code=True,
|
||||
),
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4, num_procs=1),
|
||||
)
|
||||
]
|
5
configs/summarizers/groups/ds1000.py
Normal file
5
configs/summarizers/groups/ds1000.py
Normal file
@ -0,0 +1,5 @@
|
||||
ds1000_summary_groups = []
|
||||
|
||||
_ds1000_all = ['Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch', 'Matplotlib']
|
||||
_ds1000_all = ['ds1000_' + d for d in _ds1000_all]
|
||||
ds1000_summary_groups.append({'name': 'ds1000', 'subsets': _ds1000_all})
|
@ -1,15 +1,19 @@
|
||||
import configparser
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import os.path as osp
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from pathlib import Path
|
||||
from shutil import copyfile
|
||||
from subprocess import PIPE, Popen
|
||||
from typing import Optional, Union
|
||||
|
||||
@ -20,6 +24,11 @@ from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
_LIBRARY_NAME_LIST = [
|
||||
'Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch',
|
||||
'Matplotlib'
|
||||
]
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class DS1000Dataset(BaseDataset):
|
||||
@ -323,3 +332,98 @@ def import_source_file(fname, modname):
|
||||
except FileNotFoundError as e:
|
||||
raise ImportError(f'{e.strerror}: {fname}') from e
|
||||
return module
|
||||
|
||||
|
||||
class DS1000ServiceEvaluator(BaseEvaluator):
|
||||
"""Evaluator for ds1000 eval by using a service.
|
||||
|
||||
Before you use this Evaluator, launch a code eval service according to:
|
||||
https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html
|
||||
|
||||
Args:
|
||||
lib (str): The library to be evaluated.
|
||||
ip_address (str): The IP Address of DS1000 code evaluate service.
|
||||
Defaults to 'localhost'.
|
||||
port (int): The port of DS1000 code evaluate service.
|
||||
Defaults to 5000.
|
||||
timeout (int): Maximum wait time when accessing the service,
|
||||
Defaults to 100.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
lib: str,
|
||||
ip_address='localhost',
|
||||
port=5000,
|
||||
timeout=180) -> None:
|
||||
assert lib in _LIBRARY_NAME_LIST, (
|
||||
f' lib must be in {_LIBRARY_NAME_LIST}')
|
||||
self.lib = lib
|
||||
self.ip_address = ip_address
|
||||
self.port = port
|
||||
self.timeout = timeout
|
||||
super().__init__()
|
||||
|
||||
def score(self, predictions, references):
|
||||
processed_predictions = {}
|
||||
assert len(predictions) == len(references)
|
||||
for i, (pred, gold) in enumerate(zip(predictions, references)):
|
||||
processed_predictions[str(i)] = {'prediction': pred, 'gold': gold}
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tmp_out_path = osp.join(tmp_dir, f'ds1000_{self.lib}.json')
|
||||
with open(tmp_out_path, 'w', encoding='utf-8') as json_file:
|
||||
json.dump(processed_predictions,
|
||||
json_file,
|
||||
indent=4,
|
||||
ensure_ascii=False)
|
||||
|
||||
succeed, output = self._code_eval_service(file_path=tmp_out_path)
|
||||
if succeed:
|
||||
if isinstance(output, str):
|
||||
return json.loads(output)
|
||||
elif isinstance(output, dict):
|
||||
return output
|
||||
else:
|
||||
result_file_path = os.path.join('outputs',
|
||||
f'ds1000_{self.lib}.json')
|
||||
copyfile(tmp_out_path, result_file_path)
|
||||
ref_url = 'https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html' # noqa
|
||||
raise Exception(
|
||||
'Call CodeEvalService Error in `DS1000ServiceEvaluator`, '
|
||||
'The results have been saved in path '
|
||||
f"'{result_file_path}'. You need to check that your "
|
||||
'code evaluate service is launched and the network to '
|
||||
'service is connected, you can also get results directly '
|
||||
f'by using `curl` command refer to {ref_url}.'
|
||||
f'\nError Information: {output}')
|
||||
|
||||
def _code_eval_service(self, file_path: str) -> tuple:
|
||||
"""Access the code eval service.
|
||||
|
||||
Args:
|
||||
file_path (str): The file path to the file to be evaluated.
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: Whether the access is successful and the output.
|
||||
"""
|
||||
exec_result = subprocess.run([
|
||||
'curl', '-X', 'POST', '-F', f'file=@{file_path}',
|
||||
f'{self.ip_address}:{self.port}/evaluate'
|
||||
],
|
||||
timeout=self.timeout,
|
||||
capture_output=True)
|
||||
if exec_result.returncode == 0 and re.match(
|
||||
"\"{.*:.*}\"", exec_result.stdout.decode('utf-8')):
|
||||
return True, json.loads(exec_result.stdout.decode('utf-8'))
|
||||
else:
|
||||
if exec_result.stderr:
|
||||
try:
|
||||
err = exec_result.stderr.decode()
|
||||
except Exception:
|
||||
err = exec_result.stderr
|
||||
else:
|
||||
try:
|
||||
err = exec_result.stdout.decode()
|
||||
except Exception:
|
||||
err = exec_result.stdout
|
||||
return False, err
|
||||
|
@ -93,6 +93,7 @@ def humaneval_postprocess(text: str) -> str:
|
||||
if def_idx != -1:
|
||||
text = text[max(text.find('\n', def_idx) + 1, 0):]
|
||||
text = text.split('\n\n')[0]
|
||||
text = text.lstrip('\n')
|
||||
if text.strip().startswith('def'):
|
||||
text = '\n'.join(text.split('\n')[1:])
|
||||
if not text.startswith(' '):
|
||||
|
@ -127,7 +127,9 @@ class MBPPEvaluator(BaseEvaluator):
|
||||
predictions = [self._process_answer(pred) for pred in predictions]
|
||||
|
||||
result = {'pass': 0, 'timeout': 0, 'failed': 0, 'wrong_answer': 0}
|
||||
for test_case, pred in zip(references, predictions):
|
||||
details = {}
|
||||
for index, (test_case, pred) in enumerate(zip(references,
|
||||
predictions)):
|
||||
programs = self._process_test(test_case, pred)
|
||||
try:
|
||||
# Add exec globals to prevent the exec to raise
|
||||
@ -136,15 +138,18 @@ class MBPPEvaluator(BaseEvaluator):
|
||||
with swallow_io():
|
||||
with time_limit(2):
|
||||
exec(programs, exec_globals)
|
||||
result['pass'] += 1
|
||||
r = 'pass'
|
||||
except TimeOutException:
|
||||
result['timeout'] += 1
|
||||
r = 'timeout'
|
||||
except AssertionError:
|
||||
result['wrong_answer'] += 1
|
||||
r = 'wrong_answer'
|
||||
except BaseException:
|
||||
result['failed'] += 1
|
||||
r = 'failed'
|
||||
result[r] += 1
|
||||
details[str(index)] = {'programs': programs, 'result': r}
|
||||
|
||||
result['score'] = result['pass'] / len(predictions) * 100
|
||||
result['details'] = details
|
||||
return result
|
||||
|
||||
def _process_answer(self, text):
|
||||
|
@ -147,26 +147,26 @@ class DefaultSummarizer:
|
||||
if all(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']):
|
||||
group_metrics = [default_metric]
|
||||
for dataset_abbr, metric in sg['subsets']:
|
||||
scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
|
||||
scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
|
||||
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
|
||||
else:
|
||||
group_metrics = list(functools.reduce(lambda a, b: a & b, [set(dataset_metrics[dataset_abbr]) for dataset_abbr in sg['subsets']]))
|
||||
if len(group_metrics) > 1:
|
||||
for metric in group_metrics:
|
||||
for dataset_abbr in sg['subsets']:
|
||||
scores.setdefault(metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
|
||||
scores.setdefault(metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
|
||||
eval_modes.append(dataset_eval_mode.get(sg['subsets'][0], 'unknown'))
|
||||
else:
|
||||
group_metrics = [default_metric]
|
||||
for dataset_abbr in sg['subsets']:
|
||||
metric = dataset_metrics[dataset_abbr][0]
|
||||
scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
|
||||
scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
|
||||
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
|
||||
|
||||
result = {}
|
||||
for metric in scores:
|
||||
if default_metric == 'standard_deviation':
|
||||
avg = sum(scores[metric]) / len(scores[metric])
|
||||
avg = sum(scores[metric].values()) / len(scores[metric])
|
||||
variance = sum((k - avg) ** 2 for k in scores[metric]) / len(scores[metric])
|
||||
scores[metric] = result[metric] = math.sqrt(variance)
|
||||
else:
|
||||
@ -174,7 +174,7 @@ class DefaultSummarizer:
|
||||
numerator = sum(scores[metric][k] * sg['weights'][k] for k in sg['weights'])
|
||||
denominator = sum(sg['weights'].values())
|
||||
else:
|
||||
numerator = sum(scores[metric])
|
||||
numerator = sum(scores[metric].values())
|
||||
denominator = len(scores[metric])
|
||||
scores[metric] = result[metric] = numerator / denominator
|
||||
eval_modes = list(set(eval_modes))
|
||||
|
@ -51,19 +51,53 @@ def first_capital_postprocess(text: str) -> str:
|
||||
def first_option_postprocess(text: str, options: str) -> str:
|
||||
"""Find first valid option for text."""
|
||||
|
||||
# yapf: disable
|
||||
# flake8: noqa: W605
|
||||
patterns = [
|
||||
f'[Tt]he answer is [{options}]',
|
||||
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
|
||||
f'答案(?:选项)?是(.*?)[{options}]',
|
||||
f'答案(?:选项)?为(.*?)[{options}]',
|
||||
f'答案(?:选项)?选(.*?)[{options}]',
|
||||
f'选项[{options}]是?正确',
|
||||
f'选项[{options}]为?正确',
|
||||
f'固选(.*?)[{options}]',
|
||||
f'答案应该是(.*?)[{options}]',
|
||||
f'(\s|^)[{options}][\s。,,\.$]', # noqa
|
||||
f'答案是?\s?([{options}])',
|
||||
f'答案是?\s?:([{options}])',
|
||||
f'答案是?\s?:([{options}])',
|
||||
f'答案应该?是\s?([{options}])',
|
||||
f'答案应该?选\s?([{options}])',
|
||||
f'答案为\s?([{options}])',
|
||||
f'答案选\s?([{options}])',
|
||||
f'选择?\s?([{options}])',
|
||||
f'只有选?项?\s?([{options}])\s?是?对',
|
||||
f'只有选?项?\s?([{options}])\s?是?错',
|
||||
f'只有选?项?\s?([{options}])\s?不?正确',
|
||||
f'只有选?项?\s?([{options}])\s?错误',
|
||||
f'说法不?对选?项?的?是\s?([{options}])',
|
||||
f'说法不?正确选?项?的?是\s?([{options}])',
|
||||
f'说法错误选?项?的?是\s?([{options}])',
|
||||
f'([{options}])\s?是正确的',
|
||||
f'([{options}])\s?是正确答案',
|
||||
f'选项\s?([{options}])\s?正确',
|
||||
f'所以答\s?([{options}])',
|
||||
f'1.\s?([{options}])[.。$]?$',
|
||||
f'所以\s?([{options}][.。$]?$)',
|
||||
f'所有\s?([{options}][.。$]?$)',
|
||||
f'[\s,::,]([{options}])[。,,\.]?$',
|
||||
f'[\s,,::][故即]([{options}])[。\.]?$',
|
||||
f'[\s,,::]因此([{options}])[。\.]?$',
|
||||
f'[是为。]\s?([{options}])[。\.]?$',
|
||||
f'因此\s?([{options}])[。\.]?$',
|
||||
f'显然\s?([{options}])[。\.]?$',
|
||||
f'1.\s?(.*?)$',
|
||||
f'答案是\s?(\S+)(?:。|$)',
|
||||
f'答案应该是\s?(\S+)(?:。|$)',
|
||||
f'答案为\s?(\S+)(?:。|$)',
|
||||
f'(\s|^)[{options}][\s。,,::\.$]',
|
||||
f'[Tt]he answer is ([{options}])',
|
||||
f'[Tt]he answer is option ([{options}])',
|
||||
f'[Tt]he correct answer is ([{options}])',
|
||||
f'[Tt]he correct answer is option ([{options}])',
|
||||
f'[Tt]he answer to the question is ([{options}])',
|
||||
f'([{options}]):',
|
||||
f'(^|\s)[{options}](\s|$)',
|
||||
f'[{options}]',
|
||||
]
|
||||
# flake8: noqa
|
||||
# yapf: enable
|
||||
|
||||
regexes = [re.compile(pattern) for pattern in patterns]
|
||||
for regex in regexes:
|
||||
|
@ -84,20 +84,17 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
|
||||
if infer_cfg.inferencer.type == PPLInferencer:
|
||||
labels = retriever.get_labels(ice_template=ice_template,
|
||||
prompt_template=prompt_template)
|
||||
ice = [
|
||||
retriever.generate_ice(ice_idx_list[_idx],
|
||||
ice_template=ice_template)
|
||||
for _idx in range(len(ice_idx_list))
|
||||
]
|
||||
ice = retriever.generate_ice(ice_idx_list[idx],
|
||||
ice_template=ice_template)
|
||||
print('-' * 100)
|
||||
print('ICE Template:')
|
||||
print('-' * 100)
|
||||
print(ice[0])
|
||||
print(ice)
|
||||
print('-' * 100)
|
||||
for label in labels:
|
||||
prompt = retriever.generate_label_prompt(
|
||||
idx,
|
||||
ice[idx],
|
||||
ice,
|
||||
label,
|
||||
ice_template=ice_template,
|
||||
prompt_template=prompt_template,
|
||||
@ -111,11 +108,11 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
|
||||
print(f'Truncating ice {num_ice} -> {num_ice - 1}',
|
||||
f'Number of tokens: {prompt_token_num} -> ...')
|
||||
ice_idx_list[idx] = ice_idx_list[idx][:-1]
|
||||
ice[idx] = retriever.generate_ice(
|
||||
ice_idx_list[idx], ice_template=ice_template)
|
||||
ice = retriever.generate_ice(ice_idx_list[idx],
|
||||
ice_template=ice_template)
|
||||
prompt = retriever.generate_label_prompt(
|
||||
idx,
|
||||
ice[idx],
|
||||
ice,
|
||||
label,
|
||||
ice_template=ice_template,
|
||||
prompt_template=prompt_template)
|
||||
|
Loading…
Reference in New Issue
Block a user