[Sync] some renaming (#641)

This commit is contained in:
Fengzhe Zhou 2023-11-27 16:06:49 +08:00 committed by GitHub
parent 68c4c1ef86
commit 9083dea683
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1102 additions and 564 deletions

View File

@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
'operating_system': ['Operating System', '操作系统', 'STEM'],
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
'college_programming': ['College Programming', '大学编程', 'STEM'],
'college_physics': ['College Physics', '大学物理', 'STEM'],
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
'education_science': ['Education Science', '教育学', 'Social Science'],
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
'logic': ['Logic', '逻辑学', 'Humanities'],
'law': ['Law', '法学', 'Humanities'],
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
'high_school_history': ['High School History', '高中历史', 'Humanities'],
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
'civil_servant': ['Civil Servant', '公务员', 'Other'],
'sports_science': ['Sports Science', '体育学', 'Other'],
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
'accountant': ['Accountant', '注册会计师', 'Other'],
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
'physician': ['Physician', '医师资格', 'Other'],
}
ceval_all_sets = list(ceval_subject_mapping.keys())

View File

@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
'operating_system': ['Operating System', '操作系统', 'STEM'],
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
'college_programming': ['College Programming', '大学编程', 'STEM'],
'college_physics': ['College Physics', '大学物理', 'STEM'],
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
'education_science': ['Education Science', '教育学', 'Social Science'],
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
'logic': ['Logic', '逻辑学', 'Humanities'],
'law': ['Law', '法学', 'Humanities'],
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
'high_school_history': ['High School History', '高中历史', 'Humanities'],
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
'civil_servant': ['Civil Servant', '公务员', 'Other'],
'sports_science': ['Sports Science', '体育学', 'Other'],
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
'accountant': ['Accountant', '注册会计师', 'Other'],
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
'physician': ['Physician', '医师资格', 'Other'],
}
ceval_all_sets = list(ceval_subject_mapping.keys())

View File

@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
'operating_system': ['Operating System', '操作系统', 'STEM'],
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
'college_programming': ['College Programming', '大学编程', 'STEM'],
'college_physics': ['College Physics', '大学物理', 'STEM'],
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
'education_science': ['Education Science', '教育学', 'Social Science'],
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
'logic': ['Logic', '逻辑学', 'Humanities'],
'law': ['Law', '法学', 'Humanities'],
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
'high_school_history': ['High School History', '高中历史', 'Humanities'],
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
'civil_servant': ['Civil Servant', '公务员', 'Other'],
'sports_science': ['Sports Science', '体育学', 'Other'],
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
'accountant': ['Accountant', '注册会计师', 'Other'],
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
'physician': ['Physician', '医师资格', 'Other'],
}
ceval_all_sets = list(ceval_subject_mapping.keys())

View File

@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
'operating_system': ['Operating System', '操作系统', 'STEM'],
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
'college_programming': ['College Programming', '大学编程', 'STEM'],
'college_physics': ['College Physics', '大学物理', 'STEM'],
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
'education_science': ['Education Science', '教育学', 'Social Science'],
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
'logic': ['Logic', '逻辑学', 'Humanities'],
'law': ['Law', '法学', 'Humanities'],
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
'high_school_history': ['High School History', '高中历史', 'Humanities'],
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
'civil_servant': ['Civil Servant', '公务员', 'Other'],
'sports_science': ['Sports Science', '体育学', 'Other'],
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
'accountant': ['Accountant', '注册会计师', 'Other'],
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
'physician': ['Physician', '医师资格', 'Other'],
}
ceval_all_sets = list(ceval_subject_mapping.keys())

View File

@ -0,0 +1,105 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
from opencompass.utils.text_postprocessors import first_option_postprocess
ceval_subject_mapping = {
'computer_network': ['Computer Network', '计算机网络', 'STEM'],
'operating_system': ['Operating System', '操作系统', 'STEM'],
'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
'college_programming': ['College Programming', '大学编程', 'STEM'],
'college_physics': ['College Physics', '大学物理', 'STEM'],
'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
'college_economics': ['College Economics', '大学经济学', 'Social Science'],
'business_administration': ['Business Administration', '工商管理', 'Social Science'],
'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
'education_science': ['Education Science', '教育学', 'Social Science'],
'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
'logic': ['Logic', '逻辑学', 'Humanities'],
'law': ['Law', '法学', 'Humanities'],
'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
'art_studies': ['Art Studies', '艺术学', 'Humanities'],
'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
'high_school_history': ['High School History', '高中历史', 'Humanities'],
'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
'civil_servant': ['Civil Servant', '公务员', 'Other'],
'sports_science': ['Sports Science', '体育学', 'Other'],
'plant_protection': ['Plant Protection', '植物保护', 'Other'],
'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
'accountant': ['Accountant', '注册会计师', 'Other'],
'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
'physician': ['Physician', '医师资格', 'Other'],
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n让我们一步一步思考。答案: "
),
dict(role="BOT", prompt="{answer}"),
]),
ice_token="</E>",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=256),
)
ceval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))

View File

@ -0,0 +1,41 @@
# Use FixKRetriever to avoid hang caused by the Huggingface
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(role="HUMAN", prompt="Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer: "),
dict(role="BOT", prompt=f"{ans}"),
])
for ans in ['A', 'B', 'C', 'D', 'E']
},
ice_token='</E>')
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4, 5, 6, 7]),
inferencer=dict(type=PPLInferencer))
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
abbr='commonsense_qa',
type=commonsenseqaDataset,
path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]

View File

@ -37,7 +37,7 @@ ds1000_datasets = [
dict(
abbr=f"ds1000_{lib}",
type=DS1000Dataset,
path="ds1000_data/",
path="./data/ds1000_data/",
libs=f"{lib}",
reader_cfg=ds1000_reader_cfg,
infer_cfg=ds1000_infer_cfg,
@ -55,7 +55,7 @@ ds1000_datasets.append(
dict(
abbr="ds1000_Matplotlib",
type=DS1000Dataset,
path="ds1000_data/",
path="./data/ds1000_data/",
libs="Matplotlib",
reader_cfg=ds1000_reader_cfg,
infer_cfg=ds1000_infer_cfg,

View File

@ -0,0 +1,67 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
ds1000_reader_cfg = dict(
input_columns=["prompt"],
output_column="test_column",
train_split='test',
test_split='test')
ds1000_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="{prompt}",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
ds1000_eval_cfg_dict = {
lib: dict(
evaluator=dict(
type=DS1000ServiceEvaluator,
lib=lib,
ip_address=
"localhost", # replace to your code_eval_server ip_address, port
port=5000
),
pred_role="BOT")
for lib in [
'Pandas',
'Numpy',
'Tensorflow',
'Scipy',
'Sklearn',
'Pytorch',
'Matplotlib',
]
}
# The DS-1000 dataset can be downloaded from
# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
ds1000_datasets = [
dict(
abbr=f"ds1000_{lib}",
type=DS1000Dataset,
path="./data/ds1000_data/",
libs=f"{lib}",
reader_cfg=ds1000_reader_cfg,
infer_cfg=ds1000_infer_cfg,
eval_cfg=ds1000_eval_cfg_dict[lib],
) for lib in [
'Pandas',
'Numpy',
'Tensorflow',
'Scipy',
'Sklearn',
'Pytorch',
'Matplotlib',
]
]

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset_V2
hellaswag_reader_cfg = dict(
input_columns=['query', 'A', 'B', 'C', 'D'],
output_column='label')
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
ans: dict(round=[
dict(role="HUMAN", prompt="{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: "),
dict(role="BOT", prompt=f"{ans}"),
]) for ans in ['A', 'B', 'C', 'D']
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
abbr='hellaswag',
type=hellaswagDataset_V2,
path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]

View File

@ -0,0 +1,123 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
from opencompass.utils.text_postprocessors import first_option_postprocess
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
),
dict(role="BOT", prompt="{target}\n")
]),
),
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: "
),
],
),
ice_token="</E>",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=256),
)
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))

View File

@ -0,0 +1,24 @@
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='bluelm-7b-base-hf',
path="vivo-ai/BlueLM-7B-Base",
tokenizer_path='vivo-ai/BlueLM-7B-Base',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,24 @@
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='bluelm-7b-base-32k-hf',
path="vivo-ai/BlueLM-7B-Base-32K",
tokenizer_path='vivo-ai/BlueLM-7B-Base-32K',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
max_out_len=100,
max_seq_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,32 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='[|Human|]:'),
dict(role='BOT', begin='[|AI|]:', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='bluelm-7b-chat-hf',
path="vivo-ai/BlueLM-7B-Chat",
tokenizer_path='vivo-ai/BlueLM-7B-Chat',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,32 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='[|Human|]:'),
dict(role='BOT', begin='[|AI|]:', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='bluelm-7b-chat-32k-hf',
path="vivo-ai/BlueLM-7B-Chat-32K",
tokenizer_path='vivo-ai/BlueLM-7B-Chat-32K',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,33 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='', end=''),
dict(role='BOT', begin='', end='\n\n', generate=True),
],
)
models = [
dict(
abbr='nanbeige-16b-base-hf',
type=HuggingFaceCausalLM,
path='Nanbeige/Nanbeige-16B-Base',
tokenizer_path='Nanbeige/Nanbeige-16B-Base',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
torch_dtype='auto',
),
tokenizer_kwargs=dict(
padding_side='right',
truncation_side='left',
trust_remote_code=True
),
meta_template=_meta_template,
batch_padding=False,
max_out_len=1024,
max_seq_len=4096,
batch_size=1,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,34 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='', end=''),
dict(role='BOT', begin='', end='\n\n', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='nanbeige-16b-base-32k-hf',
path="Nanbeige/Nanbeige-16B-Base-32K",
tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
torch_dtype='auto',
),
tokenizer_kwargs=dict(
padding_side='right',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
batch_padding=False,
max_out_len=1024,
max_seq_len=8192,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,34 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='nanbeige-16b-chat-hf',
path="Nanbeige/Nanbeige-16B-Chat",
tokenizer_path='Nanbeige/Nanbeige-16B-Chat',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
torch_dtype='auto',
),
tokenizer_kwargs=dict(
padding_side='right',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
batch_padding=False,
max_out_len=1024,
max_seq_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,34 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='nanbeige-16b-chat-32k-hf',
path="Nanbeige/Nanbeige-16B-Chat-32K",
tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
torch_dtype='auto',
),
tokenizer_kwargs=dict(
padding_side='right',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
batch_padding=False,
max_out_len=1024,
max_seq_len=8192,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,33 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=2
)
models = [
dict(
abbr='dolphin-2.2.1-mistral-7b-hf',
type=HuggingFaceCausalLM,
path='ehartford/dolphin-2.2.1-mistral-7b',
tokenizer_path='ehartford/dolphin-2.2.1-mistral-7b',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,33 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='### User:\n', end='\n'),
dict(role="BOT", begin="### Assistant:\n", generate=True),
],
eos_token_id=2
)
models = [
dict(
abbr='fashiongpt-70b-v11-hf',
type=HuggingFaceCausalLM,
path='ICBU-NPU/FashionGPT-70B-V1.1',
tokenizer_path='ICBU-NPU/FashionGPT-70B-V1.1',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=8, num_procs=1),
)
]

View File

@ -0,0 +1,34 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
begin='<|startoftext|>',
round=[
dict(role="HUMAN", begin='Human: ', end='\n\n'),
dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True),
],
eos_token_id=2
)
models = [
dict(
abbr='orionstar-yi-34b-chat-hf',
type=HuggingFaceCausalLM,
path='OrionStarAI/OrionStar-Yi-34B-Chat',
tokenizer_path='OrionStarAI/OrionStar-Yi-34B-Chat',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1),
)
]

View File

@ -0,0 +1,5 @@
ds1000_summary_groups = []
_ds1000_all = ['Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch', 'Matplotlib']
_ds1000_all = ['ds1000_' + d for d in _ds1000_all]
ds1000_summary_groups.append({'name': 'ds1000', 'subsets': _ds1000_all})

View File

@ -1,15 +1,19 @@
import configparser
import importlib
import json
import os
import os.path as osp
import pickle
import re
import shutil
import signal
import subprocess
import sys
import tempfile
import threading
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from shutil import copyfile
from subprocess import PIPE, Popen
from typing import Optional, Union
@ -20,6 +24,11 @@ from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
_LIBRARY_NAME_LIST = [
'Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch',
'Matplotlib'
]
@LOAD_DATASET.register_module()
class DS1000Dataset(BaseDataset):
@ -323,3 +332,98 @@ def import_source_file(fname, modname):
except FileNotFoundError as e:
raise ImportError(f'{e.strerror}: {fname}') from e
return module
class DS1000ServiceEvaluator(BaseEvaluator):
"""Evaluator for ds1000 eval by using a service.
Before you use this Evaluator, launch a code eval service according to:
https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html
Args:
lib (str): The library to be evaluated.
ip_address (str): The IP Address of DS1000 code evaluate service.
Defaults to 'localhost'.
port (int): The port of DS1000 code evaluate service.
Defaults to 5000.
timeout (int): Maximum wait time when accessing the service,
Defaults to 100.
"""
def __init__(self,
lib: str,
ip_address='localhost',
port=5000,
timeout=180) -> None:
assert lib in _LIBRARY_NAME_LIST, (
f' lib must be in {_LIBRARY_NAME_LIST}')
self.lib = lib
self.ip_address = ip_address
self.port = port
self.timeout = timeout
super().__init__()
def score(self, predictions, references):
processed_predictions = {}
assert len(predictions) == len(references)
for i, (pred, gold) in enumerate(zip(predictions, references)):
processed_predictions[str(i)] = {'prediction': pred, 'gold': gold}
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_out_path = osp.join(tmp_dir, f'ds1000_{self.lib}.json')
with open(tmp_out_path, 'w', encoding='utf-8') as json_file:
json.dump(processed_predictions,
json_file,
indent=4,
ensure_ascii=False)
succeed, output = self._code_eval_service(file_path=tmp_out_path)
if succeed:
if isinstance(output, str):
return json.loads(output)
elif isinstance(output, dict):
return output
else:
result_file_path = os.path.join('outputs',
f'ds1000_{self.lib}.json')
copyfile(tmp_out_path, result_file_path)
ref_url = 'https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html' # noqa
raise Exception(
'Call CodeEvalService Error in `DS1000ServiceEvaluator`, '
'The results have been saved in path '
f"'{result_file_path}'. You need to check that your "
'code evaluate service is launched and the network to '
'service is connected, you can also get results directly '
f'by using `curl` command refer to {ref_url}.'
f'\nError Information: {output}')
def _code_eval_service(self, file_path: str) -> tuple:
"""Access the code eval service.
Args:
file_path (str): The file path to the file to be evaluated.
Returns:
tuple[bool, str]: Whether the access is successful and the output.
"""
exec_result = subprocess.run([
'curl', '-X', 'POST', '-F', f'file=@{file_path}',
f'{self.ip_address}:{self.port}/evaluate'
],
timeout=self.timeout,
capture_output=True)
if exec_result.returncode == 0 and re.match(
"\"{.*:.*}\"", exec_result.stdout.decode('utf-8')):
return True, json.loads(exec_result.stdout.decode('utf-8'))
else:
if exec_result.stderr:
try:
err = exec_result.stderr.decode()
except Exception:
err = exec_result.stderr
else:
try:
err = exec_result.stdout.decode()
except Exception:
err = exec_result.stdout
return False, err

View File

@ -93,6 +93,7 @@ def humaneval_postprocess(text: str) -> str:
if def_idx != -1:
text = text[max(text.find('\n', def_idx) + 1, 0):]
text = text.split('\n\n')[0]
text = text.lstrip('\n')
if text.strip().startswith('def'):
text = '\n'.join(text.split('\n')[1:])
if not text.startswith(' '):

View File

@ -127,7 +127,9 @@ class MBPPEvaluator(BaseEvaluator):
predictions = [self._process_answer(pred) for pred in predictions]
result = {'pass': 0, 'timeout': 0, 'failed': 0, 'wrong_answer': 0}
for test_case, pred in zip(references, predictions):
details = {}
for index, (test_case, pred) in enumerate(zip(references,
predictions)):
programs = self._process_test(test_case, pred)
try:
# Add exec globals to prevent the exec to raise
@ -136,15 +138,18 @@ class MBPPEvaluator(BaseEvaluator):
with swallow_io():
with time_limit(2):
exec(programs, exec_globals)
result['pass'] += 1
r = 'pass'
except TimeOutException:
result['timeout'] += 1
r = 'timeout'
except AssertionError:
result['wrong_answer'] += 1
r = 'wrong_answer'
except BaseException:
result['failed'] += 1
r = 'failed'
result[r] += 1
details[str(index)] = {'programs': programs, 'result': r}
result['score'] = result['pass'] / len(predictions) * 100
result['details'] = details
return result
def _process_answer(self, text):

View File

@ -147,26 +147,26 @@ class DefaultSummarizer:
if all(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']):
group_metrics = [default_metric]
for dataset_abbr, metric in sg['subsets']:
scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
else:
group_metrics = list(functools.reduce(lambda a, b: a & b, [set(dataset_metrics[dataset_abbr]) for dataset_abbr in sg['subsets']]))
if len(group_metrics) > 1:
for metric in group_metrics:
for dataset_abbr in sg['subsets']:
scores.setdefault(metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
scores.setdefault(metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
eval_modes.append(dataset_eval_mode.get(sg['subsets'][0], 'unknown'))
else:
group_metrics = [default_metric]
for dataset_abbr in sg['subsets']:
metric = dataset_metrics[dataset_abbr][0]
scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
result = {}
for metric in scores:
if default_metric == 'standard_deviation':
avg = sum(scores[metric]) / len(scores[metric])
avg = sum(scores[metric].values()) / len(scores[metric])
variance = sum((k - avg) ** 2 for k in scores[metric]) / len(scores[metric])
scores[metric] = result[metric] = math.sqrt(variance)
else:
@ -174,7 +174,7 @@ class DefaultSummarizer:
numerator = sum(scores[metric][k] * sg['weights'][k] for k in sg['weights'])
denominator = sum(sg['weights'].values())
else:
numerator = sum(scores[metric])
numerator = sum(scores[metric].values())
denominator = len(scores[metric])
scores[metric] = result[metric] = numerator / denominator
eval_modes = list(set(eval_modes))

View File

@ -51,19 +51,53 @@ def first_capital_postprocess(text: str) -> str:
def first_option_postprocess(text: str, options: str) -> str:
"""Find first valid option for text."""
# yapf: disable
# flake8: noqa: W605
patterns = [
f'[Tt]he answer is [{options}]',
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
f'答案(?:选项)?是(.*?)[{options}]',
f'答案(?:选项)?为(.*?)[{options}]',
f'答案(?:选项)?选(.*?)[{options}]',
f'选项[{options}]是?正确',
f'选项[{options}]为?正确',
f'固选(.*?)[{options}]',
f'答案应该是(.*?)[{options}]',
f'(\s|^)[{options}][\s。,\.$]', # noqa
f'答案是?\s?([{options}])',
f'答案是?\s?([{options}])',
f'答案是?\s?:([{options}])',
f'答案应该?是\s?([{options}])',
f'答案应该?选\s?([{options}])',
f'答案为\s?([{options}])',
f'答案选\s?([{options}])',
f'选择?\s?([{options}])',
f'只有选?项?\s?([{options}])\s?是?对',
f'只有选?项?\s?([{options}])\s?是?错',
f'只有选?项?\s?([{options}])\s?不?正确',
f'只有选?项?\s?([{options}])\s?错误',
f'说法不?对选?项?的?是\s?([{options}])',
f'说法不?正确选?项?的?是\s?([{options}])',
f'说法错误选?项?的?是\s?([{options}])',
f'([{options}])\s?是正确的',
f'([{options}])\s?是正确答案',
f'选项\s?([{options}])\s?正确',
f'所以答\s?([{options}])',
f'1.\s?([{options}])[.。$]?$',
f'所以\s?([{options}][.。$]?$)',
f'所有\s?([{options}][.。$]?$)',
f'[\s:,]([{options}])[。,,\.]?$',
f'[\s,:][故即]([{options}])[。\.]?$',
f'[\s,:]因此([{options}])[。\.]?$',
f'[是为。]\s?([{options}])[。\.]?$',
f'因此\s?([{options}])[。\.]?$',
f'显然\s?([{options}])[。\.]?$',
f'1.\s?(.*?)$',
f'答案是\s?(\S+)(?:。|$)',
f'答案应该是\s?(\S+)(?:。|$)',
f'答案为\s?(\S+)(?:。|$)',
f'(\s|^)[{options}][\s。,:\.$]',
f'[Tt]he answer is ([{options}])',
f'[Tt]he answer is option ([{options}])',
f'[Tt]he correct answer is ([{options}])',
f'[Tt]he correct answer is option ([{options}])',
f'[Tt]he answer to the question is ([{options}])',
f'([{options}]):',
f'(^|\s)[{options}](\s|$)',
f'[{options}]',
]
# flake8: noqa
# yapf: enable
regexes = [re.compile(pattern) for pattern in patterns]
for regex in regexes:

View File

@ -84,20 +84,17 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
if infer_cfg.inferencer.type == PPLInferencer:
labels = retriever.get_labels(ice_template=ice_template,
prompt_template=prompt_template)
ice = [
retriever.generate_ice(ice_idx_list[_idx],
ice_template=ice_template)
for _idx in range(len(ice_idx_list))
]
ice = retriever.generate_ice(ice_idx_list[idx],
ice_template=ice_template)
print('-' * 100)
print('ICE Template:')
print('-' * 100)
print(ice[0])
print(ice)
print('-' * 100)
for label in labels:
prompt = retriever.generate_label_prompt(
idx,
ice[idx],
ice,
label,
ice_template=ice_template,
prompt_template=prompt_template,
@ -111,11 +108,11 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
print(f'Truncating ice {num_ice} -> {num_ice - 1}',
f'Number of tokens: {prompt_token_num} -> ...')
ice_idx_list[idx] = ice_idx_list[idx][:-1]
ice[idx] = retriever.generate_ice(
ice_idx_list[idx], ice_template=ice_template)
ice = retriever.generate_ice(ice_idx_list[idx],
ice_template=ice_template)
prompt = retriever.generate_label_prompt(
idx,
ice[idx],
ice,
label,
ice_template=ice_template,
prompt_template=prompt_template)