[Sync] some renaming (#641)

2025-05-30 16:03:24 +08:00 · 2023-11-27 16:06:49 +08:00 · 2023-11-27 16:06:49 +08:00 · 9083dea683
commit 9083dea683
parent 68c4c1ef86
28 changed files with 1102 additions and 564 deletions
--- a/configs/datasets/ceval/ceval_gen_2daf24.py
+++ b/configs/datasets/ceval/ceval_gen_2daf24.py
@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
 from opencompass.utils.text_postprocessors import first_capital_postprocess

 ceval_subject_mapping = {
-    "computer_network":
-    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
-    "operating_system":
-    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
-    "computer_architecture":
-    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
-    "college_programming":
-    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
-    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
-    "college_chemistry":
-    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
-    "advanced_mathematics":
-    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
-    "probability_and_statistics":
-    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
-    "discrete_mathematics":
-    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
-    "electrical_engineer": [
-        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
-        "STEM"
-    ],
-    "metrology_engineer":
-    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
-    "high_school_mathematics":
-    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
-    "high_school_physics":
-    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
-    "high_school_chemistry":
-    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
-    "high_school_biology": [
-        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_mathematics": [
-        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
-    ],
-    "middle_school_biology": [
-        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_physics": [
-        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
-    ],
-    "middle_school_chemistry": [
-        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
-    ],
-    "veterinary_medicine": [
-        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
-    ],
-    "college_economics": [
-        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
-    ],
-    "business_administration": [
-        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
-    ],
-    "marxism": [
-        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
-        "Social Science"
-    ],
-    "mao_zedong_thought": [
-        "Mao Zedong Thought",
-        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
-        "Social Science"
-    ],
-    "education_science": [
-        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
-    ],
-    "teacher_qualification": [
-        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
-    ],
-    "high_school_politics": [
-        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "high_school_geography": [
-        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "middle_school_politics": [
-        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "middle_school_geography": [
-        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "modern_chinese_history":
-    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
-    "ideological_and_moral_cultivation": [
-        "Ideological and Moral Cultivation",
-        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
-        "Humanities"
-    ],
-    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
-    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
-    "chinese_language_and_literature": [
-        "Chinese Language and Literature",
-        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
-    ],
-    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
-    "professional_tour_guide": [
-        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
-    ],
-    "legal_professional": [
-        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
-        "Humanities"
-    ],
-    "high_school_chinese": [
-        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
-    ],
-    "high_school_history": [
-        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "middle_school_history": [
-        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
-    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
-    "plant_protection": [
-        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
-    ],
-    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
-    "clinical_medicine": [
-        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
-    ],
-    "urban_and_rural_planner": [
-        "Urban and Rural Planner",
-        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
-    ],
-    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
-    "fire_engineer": [
-        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "environmental_impact_assessment_engineer": [
-        "Environmental Impact Assessment Engineer",
-        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
-    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+    'computer_network': ['Computer Network', '计算机网络', 'STEM'],
+    'operating_system': ['Operating System', '操作系统', 'STEM'],
+    'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
+    'college_programming': ['College Programming', '大学编程', 'STEM'],
+    'college_physics': ['College Physics', '大学物理', 'STEM'],
+    'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
+    'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
+    'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
+    'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
+    'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
+    'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
+    'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
+    'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
+    'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
+    'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
+    'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
+    'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
+    'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
+    'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
+    'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
+    'college_economics': ['College Economics', '大学经济学', 'Social Science'],
+    'business_administration': ['Business Administration', '工商管理', 'Social Science'],
+    'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
+    'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
+    'education_science': ['Education Science', '教育学', 'Social Science'],
+    'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
+    'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
+    'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
+    'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
+    'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
+    'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
+    'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
+    'logic': ['Logic', '逻辑学', 'Humanities'],
+    'law': ['Law', '法学', 'Humanities'],
+    'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
+    'art_studies': ['Art Studies', '艺术学', 'Humanities'],
+    'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
+    'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
+    'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
+    'high_school_history': ['High School History', '高中历史', 'Humanities'],
+    'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
+    'civil_servant': ['Civil Servant', '公务员', 'Other'],
+    'sports_science': ['Sports Science', '体育学', 'Other'],
+    'plant_protection': ['Plant Protection', '植物保护', 'Other'],
+    'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
+    'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
+    'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
+    'accountant': ['Accountant', '注册会计师', 'Other'],
+    'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
+    'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
+    'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
+    'physician': ['Physician', '医师资格', 'Other'],
 }
 ceval_all_sets = list(ceval_subject_mapping.keys())

--- a/configs/datasets/ceval/ceval_gen_5f30c7.py
+++ b/configs/datasets/ceval/ceval_gen_5f30c7.py
@ -6,139 +6,58 @@ from opencompass.datasets import CEvalDataset
 from opencompass.utils.text_postprocessors import first_capital_postprocess

 ceval_subject_mapping = {
-    "computer_network":
-    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
-    "operating_system":
-    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
-    "computer_architecture":
-    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
-    "college_programming":
-    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
-    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
-    "college_chemistry":
-    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
-    "advanced_mathematics":
-    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
-    "probability_and_statistics":
-    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
-    "discrete_mathematics":
-    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
-    "electrical_engineer": [
-        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
-        "STEM"
-    ],
-    "metrology_engineer":
-    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
-    "high_school_mathematics":
-    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
-    "high_school_physics":
-    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
-    "high_school_chemistry":
-    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
-    "high_school_biology": [
-        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_mathematics": [
-        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
-    ],
-    "middle_school_biology": [
-        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_physics": [
-        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
-    ],
-    "middle_school_chemistry": [
-        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
-    ],
-    "veterinary_medicine": [
-        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
-    ],
-    "college_economics": [
-        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
-    ],
-    "business_administration": [
-        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
-    ],
-    "marxism": [
-        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
-        "Social Science"
-    ],
-    "mao_zedong_thought": [
-        "Mao Zedong Thought",
-        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
-        "Social Science"
-    ],
-    "education_science": [
-        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
-    ],
-    "teacher_qualification": [
-        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
-    ],
-    "high_school_politics": [
-        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "high_school_geography": [
-        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "middle_school_politics": [
-        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "middle_school_geography": [
-        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "modern_chinese_history":
-    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
-    "ideological_and_moral_cultivation": [
-        "Ideological and Moral Cultivation",
-        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
-        "Humanities"
-    ],
-    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
-    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
-    "chinese_language_and_literature": [
-        "Chinese Language and Literature",
-        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
-    ],
-    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
-    "professional_tour_guide": [
-        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
-    ],
-    "legal_professional": [
-        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
-        "Humanities"
-    ],
-    "high_school_chinese": [
-        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
-    ],
-    "high_school_history": [
-        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "middle_school_history": [
-        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
-    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
-    "plant_protection": [
-        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
-    ],
-    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
-    "clinical_medicine": [
-        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
-    ],
-    "urban_and_rural_planner": [
-        "Urban and Rural Planner",
-        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
-    ],
-    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
-    "fire_engineer": [
-        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "environmental_impact_assessment_engineer": [
-        "Environmental Impact Assessment Engineer",
-        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
-    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+    'computer_network': ['Computer Network', '计算机网络', 'STEM'],
+    'operating_system': ['Operating System', '操作系统', 'STEM'],
+    'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
+    'college_programming': ['College Programming', '大学编程', 'STEM'],
+    'college_physics': ['College Physics', '大学物理', 'STEM'],
+    'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
+    'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
+    'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
+    'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
+    'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
+    'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
+    'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
+    'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
+    'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
+    'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
+    'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
+    'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
+    'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
+    'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
+    'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
+    'college_economics': ['College Economics', '大学经济学', 'Social Science'],
+    'business_administration': ['Business Administration', '工商管理', 'Social Science'],
+    'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
+    'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
+    'education_science': ['Education Science', '教育学', 'Social Science'],
+    'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
+    'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
+    'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
+    'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
+    'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
+    'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
+    'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
+    'logic': ['Logic', '逻辑学', 'Humanities'],
+    'law': ['Law', '法学', 'Humanities'],
+    'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
+    'art_studies': ['Art Studies', '艺术学', 'Humanities'],
+    'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
+    'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
+    'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
+    'high_school_history': ['High School History', '高中历史', 'Humanities'],
+    'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
+    'civil_servant': ['Civil Servant', '公务员', 'Other'],
+    'sports_science': ['Sports Science', '体育学', 'Other'],
+    'plant_protection': ['Plant Protection', '植物保护', 'Other'],
+    'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
+    'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
+    'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
+    'accountant': ['Accountant', '注册会计师', 'Other'],
+    'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
+    'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
+    'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
+    'physician': ['Physician', '医师资格', 'Other'],
 }
 ceval_all_sets = list(ceval_subject_mapping.keys())

--- a/configs/datasets/ceval/ceval_ppl_578f8d.py
+++ b/configs/datasets/ceval/ceval_ppl_578f8d.py
@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CEvalDataset

 ceval_subject_mapping = {
-    "computer_network":
-    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
-    "operating_system":
-    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
-    "computer_architecture":
-    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
-    "college_programming":
-    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
-    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
-    "college_chemistry":
-    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
-    "advanced_mathematics":
-    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
-    "probability_and_statistics":
-    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
-    "discrete_mathematics":
-    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
-    "electrical_engineer": [
-        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
-        "STEM"
-    ],
-    "metrology_engineer":
-    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
-    "high_school_mathematics":
-    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
-    "high_school_physics":
-    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
-    "high_school_chemistry":
-    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
-    "high_school_biology": [
-        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_mathematics": [
-        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
-    ],
-    "middle_school_biology": [
-        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_physics": [
-        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
-    ],
-    "middle_school_chemistry": [
-        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
-    ],
-    "veterinary_medicine": [
-        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
-    ],
-    "college_economics": [
-        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
-    ],
-    "business_administration": [
-        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
-    ],
-    "marxism": [
-        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
-        "Social Science"
-    ],
-    "mao_zedong_thought": [
-        "Mao Zedong Thought",
-        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
-        "Social Science"
-    ],
-    "education_science": [
-        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
-    ],
-    "teacher_qualification": [
-        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
-    ],
-    "high_school_politics": [
-        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "high_school_geography": [
-        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "middle_school_politics": [
-        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "middle_school_geography": [
-        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "modern_chinese_history":
-    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
-    "ideological_and_moral_cultivation": [
-        "Ideological and Moral Cultivation",
-        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
-        "Humanities"
-    ],
-    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
-    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
-    "chinese_language_and_literature": [
-        "Chinese Language and Literature",
-        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
-    ],
-    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
-    "professional_tour_guide": [
-        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
-    ],
-    "legal_professional": [
-        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
-        "Humanities"
-    ],
-    "high_school_chinese": [
-        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
-    ],
-    "high_school_history": [
-        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "middle_school_history": [
-        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
-    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
-    "plant_protection": [
-        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
-    ],
-    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
-    "clinical_medicine": [
-        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
-    ],
-    "urban_and_rural_planner": [
-        "Urban and Rural Planner",
-        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
-    ],
-    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
-    "fire_engineer": [
-        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "environmental_impact_assessment_engineer": [
-        "Environmental Impact Assessment Engineer",
-        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
-    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+    'computer_network': ['Computer Network', '计算机网络', 'STEM'],
+    'operating_system': ['Operating System', '操作系统', 'STEM'],
+    'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
+    'college_programming': ['College Programming', '大学编程', 'STEM'],
+    'college_physics': ['College Physics', '大学物理', 'STEM'],
+    'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
+    'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
+    'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
+    'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
+    'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
+    'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
+    'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
+    'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
+    'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
+    'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
+    'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
+    'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
+    'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
+    'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
+    'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
+    'college_economics': ['College Economics', '大学经济学', 'Social Science'],
+    'business_administration': ['Business Administration', '工商管理', 'Social Science'],
+    'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
+    'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
+    'education_science': ['Education Science', '教育学', 'Social Science'],
+    'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
+    'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
+    'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
+    'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
+    'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
+    'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
+    'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
+    'logic': ['Logic', '逻辑学', 'Humanities'],
+    'law': ['Law', '法学', 'Humanities'],
+    'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
+    'art_studies': ['Art Studies', '艺术学', 'Humanities'],
+    'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
+    'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
+    'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
+    'high_school_history': ['High School History', '高中历史', 'Humanities'],
+    'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
+    'civil_servant': ['Civil Servant', '公务员', 'Other'],
+    'sports_science': ['Sports Science', '体育学', 'Other'],
+    'plant_protection': ['Plant Protection', '植物保护', 'Other'],
+    'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
+    'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
+    'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
+    'accountant': ['Accountant', '注册会计师', 'Other'],
+    'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
+    'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
+    'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
+    'physician': ['Physician', '医师资格', 'Other'],
 }
 ceval_all_sets = list(ceval_subject_mapping.keys())

--- a/configs/datasets/ceval/ceval_ppl_93e5ce.py
+++ b/configs/datasets/ceval/ceval_ppl_93e5ce.py
@ -5,139 +5,58 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CEvalDataset

 ceval_subject_mapping = {
-    "computer_network":
-    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
-    "operating_system":
-    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
-    "computer_architecture":
-    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
-    "college_programming":
-    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
-    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
-    "college_chemistry":
-    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
-    "advanced_mathematics":
-    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
-    "probability_and_statistics":
-    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
-    "discrete_mathematics":
-    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
-    "electrical_engineer": [
-        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
-        "STEM"
-    ],
-    "metrology_engineer":
-    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
-    "high_school_mathematics":
-    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
-    "high_school_physics":
-    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
-    "high_school_chemistry":
-    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
-    "high_school_biology": [
-        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_mathematics": [
-        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
-    ],
-    "middle_school_biology": [
-        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
-    ],
-    "middle_school_physics": [
-        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
-    ],
-    "middle_school_chemistry": [
-        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
-    ],
-    "veterinary_medicine": [
-        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
-    ],
-    "college_economics": [
-        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
-    ],
-    "business_administration": [
-        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
-    ],
-    "marxism": [
-        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
-        "Social Science"
-    ],
-    "mao_zedong_thought": [
-        "Mao Zedong Thought",
-        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
-        "Social Science"
-    ],
-    "education_science": [
-        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
-    ],
-    "teacher_qualification": [
-        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
-    ],
-    "high_school_politics": [
-        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "high_school_geography": [
-        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "middle_school_politics": [
-        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
-    ],
-    "middle_school_geography": [
-        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
-    ],
-    "modern_chinese_history":
-    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
-    "ideological_and_moral_cultivation": [
-        "Ideological and Moral Cultivation",
-        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
-        "Humanities"
-    ],
-    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
-    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
-    "chinese_language_and_literature": [
-        "Chinese Language and Literature",
-        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
-    ],
-    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
-    "professional_tour_guide": [
-        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
-    ],
-    "legal_professional": [
-        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
-        "Humanities"
-    ],
-    "high_school_chinese": [
-        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
-    ],
-    "high_school_history": [
-        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "middle_school_history": [
-        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
-    ],
-    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
-    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
-    "plant_protection": [
-        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
-    ],
-    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
-    "clinical_medicine": [
-        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
-    ],
-    "urban_and_rural_planner": [
-        "Urban and Rural Planner",
-        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
-    ],
-    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
-    "fire_engineer": [
-        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "environmental_impact_assessment_engineer": [
-        "Environmental Impact Assessment Engineer",
-        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
-    ],
-    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
-    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+    'computer_network': ['Computer Network', '计算机网络', 'STEM'],
+    'operating_system': ['Operating System', '操作系统', 'STEM'],
+    'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
+    'college_programming': ['College Programming', '大学编程', 'STEM'],
+    'college_physics': ['College Physics', '大学物理', 'STEM'],
+    'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
+    'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
+    'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
+    'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
+    'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
+    'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
+    'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
+    'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
+    'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
+    'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
+    'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
+    'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
+    'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
+    'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
+    'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
+    'college_economics': ['College Economics', '大学经济学', 'Social Science'],
+    'business_administration': ['Business Administration', '工商管理', 'Social Science'],
+    'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
+    'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
+    'education_science': ['Education Science', '教育学', 'Social Science'],
+    'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
+    'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
+    'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
+    'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
+    'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
+    'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
+    'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
+    'logic': ['Logic', '逻辑学', 'Humanities'],
+    'law': ['Law', '法学', 'Humanities'],
+    'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
+    'art_studies': ['Art Studies', '艺术学', 'Humanities'],
+    'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
+    'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
+    'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
+    'high_school_history': ['High School History', '高中历史', 'Humanities'],
+    'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
+    'civil_servant': ['Civil Servant', '公务员', 'Other'],
+    'sports_science': ['Sports Science', '体育学', 'Other'],
+    'plant_protection': ['Plant Protection', '植物保护', 'Other'],
+    'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
+    'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
+    'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
+    'accountant': ['Accountant', '注册会计师', 'Other'],
+    'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
+    'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
+    'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
+    'physician': ['Physician', '医师资格', 'Other'],
 }
 ceval_all_sets = list(ceval_subject_mapping.keys())

--- a/configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py
+++ b/configs/datasets/ceval/ceval_zero_shot_gen_bd40ef.py
@ -0,0 +1,105 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+ceval_subject_mapping = {
+    'computer_network': ['Computer Network', '计算机网络', 'STEM'],
+    'operating_system': ['Operating System', '操作系统', 'STEM'],
+    'computer_architecture': ['Computer Architecture', '计算机组成', 'STEM'],
+    'college_programming': ['College Programming', '大学编程', 'STEM'],
+    'college_physics': ['College Physics', '大学物理', 'STEM'],
+    'college_chemistry': ['College Chemistry', '大学化学', 'STEM'],
+    'advanced_mathematics': ['Advanced Mathematics', '高等数学', 'STEM'],
+    'probability_and_statistics': ['Probability and Statistics', '概率统计', 'STEM'],
+    'discrete_mathematics': ['Discrete Mathematics', '离散数学', 'STEM'],
+    'electrical_engineer': ['Electrical Engineer', '注册电气工程师', 'STEM'],
+    'metrology_engineer': ['Metrology Engineer', '注册计量师', 'STEM'],
+    'high_school_mathematics': ['High School Mathematics', '高中数学', 'STEM'],
+    'high_school_physics': ['High School Physics', '高中物理', 'STEM'],
+    'high_school_chemistry': ['High School Chemistry', '高中化学', 'STEM'],
+    'high_school_biology': ['High School Biology', '高中生物', 'STEM'],
+    'middle_school_mathematics': ['Middle School Mathematics', '初中数学', 'STEM'],
+    'middle_school_biology': ['Middle School Biology', '初中生物', 'STEM'],
+    'middle_school_physics': ['Middle School Physics', '初中物理', 'STEM'],
+    'middle_school_chemistry': ['Middle School Chemistry', '初中化学', 'STEM'],
+    'veterinary_medicine': ['Veterinary Medicine', '兽医学', 'STEM'],
+    'college_economics': ['College Economics', '大学经济学', 'Social Science'],
+    'business_administration': ['Business Administration', '工商管理', 'Social Science'],
+    'marxism': ['Marxism', '马克思主义基本原理', 'Social Science'],
+    'mao_zedong_thought': ['Mao Zedong Thought', '毛泽东思想和中国特色社会主义理论体系概论', 'Social Science'],
+    'education_science': ['Education Science', '教育学', 'Social Science'],
+    'teacher_qualification': ['Teacher Qualification', '教师资格', 'Social Science'],
+    'high_school_politics': ['High School Politics', '高中政治', 'Social Science'],
+    'high_school_geography': ['High School Geography', '高中地理', 'Social Science'],
+    'middle_school_politics': ['Middle School Politics', '初中政治', 'Social Science'],
+    'middle_school_geography': ['Middle School Geography', '初中地理', 'Social Science'],
+    'modern_chinese_history': ['Modern Chinese History', '近代史纲要', 'Humanities'],
+    'ideological_and_moral_cultivation': ['Ideological and Moral Cultivation', '思想道德修养与法律基础', 'Humanities'],
+    'logic': ['Logic', '逻辑学', 'Humanities'],
+    'law': ['Law', '法学', 'Humanities'],
+    'chinese_language_and_literature': ['Chinese Language and Literature', '中国语言文学', 'Humanities'],
+    'art_studies': ['Art Studies', '艺术学', 'Humanities'],
+    'professional_tour_guide': ['Professional Tour Guide', '导游资格', 'Humanities'],
+    'legal_professional': ['Legal Professional', '法律职业资格', 'Humanities'],
+    'high_school_chinese': ['High School Chinese', '高中语文', 'Humanities'],
+    'high_school_history': ['High School History', '高中历史', 'Humanities'],
+    'middle_school_history': ['Middle School History', '初中历史', 'Humanities'],
+    'civil_servant': ['Civil Servant', '公务员', 'Other'],
+    'sports_science': ['Sports Science', '体育学', 'Other'],
+    'plant_protection': ['Plant Protection', '植物保护', 'Other'],
+    'basic_medicine': ['Basic Medicine', '基础医学', 'Other'],
+    'clinical_medicine': ['Clinical Medicine', '临床医学', 'Other'],
+    'urban_and_rural_planner': ['Urban and Rural Planner', '注册城乡规划师', 'Other'],
+    'accountant': ['Accountant', '注册会计师', 'Other'],
+    'fire_engineer': ['Fire Engineer', '注册消防工程师', 'Other'],
+    'environmental_impact_assessment_engineer': ['Environmental Impact Assessment Engineer', '环境影响评价工程师', 'Other'],
+    'tax_accountant': ['Tax Accountant', '税务师', 'Other'],
+    'physician': ['Physician', '医师资格', 'Other'],
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template=dict(
+                    begin="</E>",
+                    round=[
+                        dict(
+                            role="HUMAN",
+                            prompt=
+                            f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n让我们一步一步思考。答案: "
+                        ),
+                        dict(role="BOT", prompt="{answer}"),
+                    ]),
+                ice_token="</E>",
+            ),
+            retriever=dict(type=ZeroRetriever),
+            inferencer=dict(type=GenInferencer, max_out_len=256),
+        )
+
+        ceval_eval_cfg = dict(
+            evaluator=dict(type=AccEvaluator),
+            pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
--- a/configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py
+++ b/configs/datasets/commonsenseqa/commonsenseqa_ppl_c49e77.py
@ -0,0 +1,41 @@
+# Use FixKRetriever to avoid hang caused by the Huggingface
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template={
+        ans: dict(
+            begin='</E>',
+            round=[
+                dict(role="HUMAN", prompt="Question: {question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer: "),
+                dict(role="BOT", prompt=f"{ans}"),
+            ])
+        for ans in ['A', 'B', 'C', 'D', 'E']
+    },
+    ice_token='</E>')
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4, 5, 6, 7]),
+    inferencer=dict(type=PPLInferencer))
+
+commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+commonsenseqa_datasets = [
+    dict(
+        abbr='commonsense_qa',
+        type=commonsenseqaDataset,
+        path='./data/commonsenseqa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg)
+]
--- a/configs/datasets/ds1000/ds1000_gen_cbc84f.py
+++ b/configs/datasets/ds1000/ds1000_gen_cbc84f.py
@ -37,7 +37,7 @@ ds1000_datasets = [
    dict(
        abbr=f"ds1000_{lib}",
        type=DS1000Dataset,
-        path="ds1000_data/",
+        path="./data/ds1000_data/",
        libs=f"{lib}",
        reader_cfg=ds1000_reader_cfg,
        infer_cfg=ds1000_infer_cfg,
@ -55,7 +55,7 @@ ds1000_datasets.append(
    dict(
        abbr="ds1000_Matplotlib",
        type=DS1000Dataset,
-        path="ds1000_data/",
+        path="./data/ds1000_data/",
        libs="Matplotlib",
        reader_cfg=ds1000_reader_cfg,
        infer_cfg=ds1000_infer_cfg,
--- a/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
+++ b/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
@ -0,0 +1,67 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
+
+ds1000_reader_cfg = dict(
+    input_columns=["prompt"],
+    output_column="test_column",
+    train_split='test',
+    test_split='test')
+
+ds1000_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt="{prompt}",
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ds1000_eval_cfg_dict = {
+    lib: dict(
+        evaluator=dict(
+            type=DS1000ServiceEvaluator,
+            lib=lib,
+            ip_address=
+            "localhost",  # replace to your code_eval_server ip_address, port
+            port=5000
+            ),
+        pred_role="BOT")
+    for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+}
+
+# The DS-1000 dataset can be downloaded from
+# https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
+ds1000_datasets = [
+    dict(
+        abbr=f"ds1000_{lib}",
+        type=DS1000Dataset,
+        path="./data/ds1000_data/",
+        libs=f"{lib}",
+        reader_cfg=ds1000_reader_cfg,
+        infer_cfg=ds1000_infer_cfg,
+        eval_cfg=ds1000_eval_cfg_dict[lib],
+    ) for lib in [
+        'Pandas',
+        'Numpy',
+        'Tensorflow',
+        'Scipy',
+        'Sklearn',
+        'Pytorch',
+        'Matplotlib',
+    ]
+]
--- a/configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py
+++ b/configs/datasets/hellaswag/hellaswag_ppl_7d7f2d.py
@ -0,0 +1,33 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import hellaswagDataset_V2
+
+hellaswag_reader_cfg = dict(
+    input_columns=['query', 'A', 'B', 'C', 'D'],
+    output_column='label')
+
+hellaswag_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            ans: dict(round=[
+                dict(role="HUMAN", prompt="{ctx}\nQuestion: Which ending makes the most sense?\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: "),
+                dict(role="BOT", prompt=f"{ans}"),
+            ]) for ans in ['A', 'B', 'C', 'D']
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+hellaswag_datasets = [
+    dict(
+        abbr='hellaswag',
+        type=hellaswagDataset_V2,
+        path='./data/hellaswag/hellaswag.jsonl',
+        reader_cfg=hellaswag_reader_cfg,
+        infer_cfg=hellaswag_infer_cfg,
+        eval_cfg=hellaswag_eval_cfg)
+]
--- a/configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py
+++ b/configs/datasets/mmlu/mmlu_zero_shot_gen_47e2c0.py
@ -0,0 +1,123 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever, ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import MMLUDataset
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
+# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
+
+mmlu_reader_cfg = dict(
+    input_columns=["input", "A", "B", "C", "D"],
+    output_column="target",
+    train_split='dev')
+
+mmlu_all_sets = [
+    "college_biology",
+    "college_chemistry",
+    "college_computer_science",
+    "college_mathematics",
+    "college_physics",
+    "electrical_engineering",
+    "astronomy",
+    "anatomy",
+    "abstract_algebra",
+    "machine_learning",
+    "clinical_knowledge",
+    "global_facts",
+    "management",
+    "nutrition",
+    "marketing",
+    "professional_accounting",
+    "high_school_geography",
+    "international_law",
+    "moral_scenarios",
+    "computer_security",
+    "high_school_microeconomics",
+    "professional_law",
+    "medical_genetics",
+    "professional_psychology",
+    "jurisprudence",
+    "world_religions",
+    "philosophy",
+    "virology",
+    "high_school_chemistry",
+    "public_relations",
+    "high_school_macroeconomics",
+    "human_sexuality",
+    "elementary_mathematics",
+    "high_school_physics",
+    "high_school_computer_science",
+    "high_school_european_history",
+    "business_ethics",
+    "moral_disputes",
+    "high_school_statistics",
+    "miscellaneous",
+    "formal_logic",
+    "high_school_government_and_politics",
+    "prehistory",
+    "security_studies",
+    "high_school_biology",
+    "logical_fallacies",
+    "high_school_world_history",
+    "professional_medicine",
+    "high_school_mathematics",
+    "college_medicine",
+    "high_school_us_history",
+    "sociology",
+    "econometrics",
+    "high_school_psychology",
+    "human_aging",
+    "us_foreign_policy",
+    "conceptual_physics",
+]
+
+
+mmlu_datasets = []
+for _name in mmlu_all_sets:
+    _hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
+    mmlu_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nA: "
+                ),
+                dict(role="BOT", prompt="{target}\n")
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin="</E>",
+                round=[
+                    dict(
+                        role="HUMAN",
+                        prompt=
+                        f"{_hint}\nQ: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nLet's think step by step. A: "
+                    ),
+                ],
+            ),
+            ice_token="</E>",
+        ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=256),
+    )
+
+    mmlu_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
+
+    mmlu_datasets.append(
+        dict(
+            abbr=f"lukaemon_mmlu_{_name}",
+            type=MMLUDataset,
+            path="./data/mmlu/",
+            name=_name,
+            reader_cfg=mmlu_reader_cfg,
+            infer_cfg=mmlu_infer_cfg,
+            eval_cfg=mmlu_eval_cfg,
+        ))
--- a/configs/models/bluelm/hf_bluelm_7b_base.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base.py
@ -0,0 +1,24 @@
+from opencompass.models import HuggingFaceCausalLM
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='bluelm-7b-base-hf',
+        path="vivo-ai/BlueLM-7B-Base",
+        tokenizer_path='vivo-ai/BlueLM-7B-Base',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/bluelm/hf_bluelm_7b_base_32k.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base_32k.py
@ -0,0 +1,24 @@
+from opencompass.models import HuggingFaceCausalLM
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='bluelm-7b-base-32k-hf',
+        path="vivo-ai/BlueLM-7B-Base-32K",
+        tokenizer_path='vivo-ai/BlueLM-7B-Base-32K',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        max_out_len=100,
+        max_seq_len=4096,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/bluelm/hf_bluelm_7b_chat.py
+++ b/configs/models/bluelm/hf_bluelm_7b_chat.py
@ -0,0 +1,32 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='[|Human|]:'),
+        dict(role='BOT', begin='[|AI|]:', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='bluelm-7b-chat-hf',
+        path="vivo-ai/BlueLM-7B-Chat",
+        tokenizer_path='vivo-ai/BlueLM-7B-Chat',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/bluelm/hf_bluelm_7b_chat_32k.py
+++ b/configs/models/bluelm/hf_bluelm_7b_chat_32k.py
@ -0,0 +1,32 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='[|Human|]:'),
+        dict(role='BOT', begin='[|AI|]:', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='bluelm-7b-chat-32k-hf',
+        path="vivo-ai/BlueLM-7B-Chat-32K",
+        tokenizer_path='vivo-ai/BlueLM-7B-Chat-32K',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=4096,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/nanbeige/hf_nanbeige_16b_base.py
+++ b/configs/models/nanbeige/hf_nanbeige_16b_base.py
@ -0,0 +1,33 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='', end=''),
+        dict(role='BOT', begin='', end='\n\n', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        abbr='nanbeige-16b-base-hf',
+        type=HuggingFaceCausalLM,
+        path='Nanbeige/Nanbeige-16B-Base',
+        tokenizer_path='Nanbeige/Nanbeige-16B-Base',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+            torch_dtype='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='right',
+            truncation_side='left',
+            trust_remote_code=True
+        ),
+        meta_template=_meta_template,
+        batch_padding=False,
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=1,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
+++ b/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
@ -0,0 +1,34 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='', end=''),
+        dict(role='BOT', begin='', end='\n\n', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='nanbeige-16b-base-32k-hf',
+        path="Nanbeige/Nanbeige-16B-Base-32K",
+        tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+            torch_dtype='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='right',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        batch_padding=False,
+        max_out_len=1024,
+        max_seq_len=8192,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/nanbeige/hf_nanbeige_16b_chat.py
+++ b/configs/models/nanbeige/hf_nanbeige_16b_chat.py
@ -0,0 +1,34 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
+        dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='nanbeige-16b-chat-hf',
+        path="Nanbeige/Nanbeige-16B-Chat",
+        tokenizer_path='Nanbeige/Nanbeige-16B-Chat',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+            torch_dtype='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='right',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        batch_padding=False,
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
+++ b/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
@ -0,0 +1,34 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
+        dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='nanbeige-16b-chat-32k-hf',
+        path="Nanbeige/Nanbeige-16B-Chat-32K",
+        tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+            torch_dtype='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='right',
+            truncation_side='left',
+            trust_remote_code=True,
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        batch_padding=False,
+        max_out_len=1024,
+        max_seq_len=8192,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/others/hf_dolphin_21_mistral_7b.py
+++ b/configs/models/others/hf_dolphin_21_mistral_7b.py
@ -0,0 +1,33 @@
+from opencompass.models import HuggingFaceCausalLM
+
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
+    ],
+    eos_token_id=2
+)
+
+models = [
+    dict(
+        abbr='dolphin-2.2.1-mistral-7b-hf',
+        type=HuggingFaceCausalLM,
+        path='ehartford/dolphin-2.2.1-mistral-7b',
+        tokenizer_path='ehartford/dolphin-2.2.1-mistral-7b',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/others/hf_fashiongpt_70b_v11.py
+++ b/configs/models/others/hf_fashiongpt_70b_v11.py
@ -0,0 +1,33 @@
+from opencompass.models import HuggingFaceCausalLM
+
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='### User:\n', end='\n'),
+        dict(role="BOT", begin="### Assistant:\n", generate=True),
+    ],
+    eos_token_id=2
+)
+
+models = [
+    dict(
+        abbr='fashiongpt-70b-v11-hf',
+        type=HuggingFaceCausalLM,
+        path='ICBU-NPU/FashionGPT-70B-V1.1',
+        tokenizer_path='ICBU-NPU/FashionGPT-70B-V1.1',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=8, num_procs=1),
+    )
+]
--- a/configs/models/others/hf_orionstar_yi_34b_chat.py
+++ b/configs/models/others/hf_orionstar_yi_34b_chat.py
@ -0,0 +1,34 @@
+from opencompass.models import HuggingFaceCausalLM
+
+
+_meta_template = dict(
+    begin='<|startoftext|>',
+    round=[
+        dict(role="HUMAN", begin='Human: ', end='\n\n'),
+        dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True),
+    ],
+    eos_token_id=2
+)
+
+models = [
+    dict(
+        abbr='orionstar-yi-34b-chat-hf',
+        type=HuggingFaceCausalLM,
+        path='OrionStarAI/OrionStar-Yi-34B-Chat',
+        tokenizer_path='OrionStarAI/OrionStar-Yi-34B-Chat',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4, num_procs=1),
+    )
+]
--- a/configs/summarizers/groups/ds1000.py
+++ b/configs/summarizers/groups/ds1000.py
@ -0,0 +1,5 @@
+ds1000_summary_groups = []
+
+_ds1000_all = ['Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch', 'Matplotlib']
+_ds1000_all = ['ds1000_' + d for d in _ds1000_all]
+ds1000_summary_groups.append({'name': 'ds1000', 'subsets': _ds1000_all})
--- a/opencompass/datasets/ds1000.py
+++ b/opencompass/datasets/ds1000.py
@ -1,15 +1,19 @@
 import configparser
 import importlib
+import json
 import os
+import os.path as osp
 import pickle
 import re
 import shutil
 import signal
+import subprocess
 import sys
 import tempfile
 import threading
 from concurrent.futures import ProcessPoolExecutor
 from pathlib import Path
+from shutil import copyfile
 from subprocess import PIPE, Popen
 from typing import Optional, Union

@ -20,6 +24,11 @@ from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS

 from .base import BaseDataset

+_LIBRARY_NAME_LIST = [
+    'Pandas', 'Numpy', 'Tensorflow', 'Scipy', 'Sklearn', 'Pytorch',
+    'Matplotlib'
+]
+

@LOAD_DATASET.register_module()
 class DS1000Dataset(BaseDataset):
@ -323,3 +332,98 @@ def import_source_file(fname, modname):
    except FileNotFoundError as e:
        raise ImportError(f'{e.strerror}: {fname}') from e
    return module
+
+
+class DS1000ServiceEvaluator(BaseEvaluator):
+    """Evaluator for ds1000 eval by using a service.
+
+    Before you use this Evaluator, launch a code eval service according to:
+    https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html
+
+    Args:
+        lib (str): The library to be evaluated.
+        ip_address (str): The IP Address of DS1000 code evaluate service.
+            Defaults to 'localhost'.
+        port (int): The port of DS1000 code evaluate service.
+            Defaults to 5000.
+        timeout (int): Maximum wait time when accessing the service,
+            Defaults to 100.
+    """
+
+    def __init__(self,
+                 lib: str,
+                 ip_address='localhost',
+                 port=5000,
+                 timeout=180) -> None:
+        assert lib in _LIBRARY_NAME_LIST, (
+            f' lib must be in {_LIBRARY_NAME_LIST}')
+        self.lib = lib
+        self.ip_address = ip_address
+        self.port = port
+        self.timeout = timeout
+        super().__init__()
+
+    def score(self, predictions, references):
+        processed_predictions = {}
+        assert len(predictions) == len(references)
+        for i, (pred, gold) in enumerate(zip(predictions, references)):
+            processed_predictions[str(i)] = {'prediction': pred, 'gold': gold}
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            tmp_out_path = osp.join(tmp_dir, f'ds1000_{self.lib}.json')
+            with open(tmp_out_path, 'w', encoding='utf-8') as json_file:
+                json.dump(processed_predictions,
+                          json_file,
+                          indent=4,
+                          ensure_ascii=False)
+
+            succeed, output = self._code_eval_service(file_path=tmp_out_path)
+            if succeed:
+                if isinstance(output, str):
+                    return json.loads(output)
+                elif isinstance(output, dict):
+                    return output
+            else:
+                result_file_path = os.path.join('outputs',
+                                                f'ds1000_{self.lib}.json')
+                copyfile(tmp_out_path, result_file_path)
+                ref_url = 'https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html'  # noqa
+                raise Exception(
+                    'Call CodeEvalService Error in `DS1000ServiceEvaluator`, '
+                    'The results have been saved in path '
+                    f"'{result_file_path}'. You need to check that your "
+                    'code evaluate service is launched and the network to '
+                    'service is connected, you can also get results directly '
+                    f'by using `curl` command refer to {ref_url}.'
+                    f'\nError Information: {output}')
+
+    def _code_eval_service(self, file_path: str) -> tuple:
+        """Access the code eval service.
+
+        Args:
+            file_path (str): The file path to the file to be evaluated.
+
+        Returns:
+            tuple[bool, str]: Whether the access is successful and the output.
+        """
+        exec_result = subprocess.run([
+            'curl', '-X', 'POST', '-F', f'file=@{file_path}',
+            f'{self.ip_address}:{self.port}/evaluate'
+        ],
+                                     timeout=self.timeout,
+                                     capture_output=True)
+        if exec_result.returncode == 0 and re.match(
+                "\"{.*:.*}\"", exec_result.stdout.decode('utf-8')):
+            return True, json.loads(exec_result.stdout.decode('utf-8'))
+        else:
+            if exec_result.stderr:
+                try:
+                    err = exec_result.stderr.decode()
+                except Exception:
+                    err = exec_result.stderr
+            else:
+                try:
+                    err = exec_result.stdout.decode()
+                except Exception:
+                    err = exec_result.stdout
+            return False, err
--- a/opencompass/datasets/humaneval.py
+++ b/opencompass/datasets/humaneval.py
@ -93,6 +93,7 @@ def humaneval_postprocess(text: str) -> str:
        if def_idx != -1:
            text = text[max(text.find('\n', def_idx) + 1, 0):]
    text = text.split('\n\n')[0]
+    text = text.lstrip('\n')
    if text.strip().startswith('def'):
        text = '\n'.join(text.split('\n')[1:])
    if not text.startswith('    '):
--- a/opencompass/datasets/mbpp.py
+++ b/opencompass/datasets/mbpp.py
@ -127,7 +127,9 @@ class MBPPEvaluator(BaseEvaluator):
        predictions = [self._process_answer(pred) for pred in predictions]

        result = {'pass': 0, 'timeout': 0, 'failed': 0, 'wrong_answer': 0}
-        for test_case, pred in zip(references, predictions):
+        details = {}
+        for index, (test_case, pred) in enumerate(zip(references,
+                                                      predictions)):
            programs = self._process_test(test_case, pred)
            try:
                # Add exec globals to prevent the exec to raise
@ -136,15 +138,18 @@ class MBPPEvaluator(BaseEvaluator):
                with swallow_io():
                    with time_limit(2):
                        exec(programs, exec_globals)
-                result['pass'] += 1
+                r = 'pass'
            except TimeOutException:
-                result['timeout'] += 1
+                r = 'timeout'
            except AssertionError:
-                result['wrong_answer'] += 1
+                r = 'wrong_answer'
            except BaseException:
-                result['failed'] += 1
+                r = 'failed'
+            result[r] += 1
+            details[str(index)] = {'programs': programs, 'result': r}

        result['score'] = result['pass'] / len(predictions) * 100
+        result['details'] = details
        return result

    def _process_answer(self, text):
--- a/opencompass/summarizers/default.py
+++ b/opencompass/summarizers/default.py
@ -147,26 +147,26 @@ class DefaultSummarizer:
                if all(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']):
                    group_metrics = [default_metric]
                    for dataset_abbr, metric in sg['subsets']:
-                        scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
+                        scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
                        eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
                else:
                    group_metrics = list(functools.reduce(lambda a, b: a & b, [set(dataset_metrics[dataset_abbr]) for dataset_abbr in sg['subsets']]))
                    if len(group_metrics) > 1:
                        for metric in group_metrics:
                            for dataset_abbr in sg['subsets']:
-                                scores.setdefault(metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
+                                scores.setdefault(metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
                                eval_modes.append(dataset_eval_mode.get(sg['subsets'][0], 'unknown'))
                    else:
                        group_metrics = [default_metric]
                        for dataset_abbr in sg['subsets']:
                            metric = dataset_metrics[dataset_abbr][0]
-                            scores.setdefault(default_metric, []).append(parsed_results[model_abbr][dataset_abbr][metric])
+                            scores.setdefault(default_metric, {})[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][metric]
                            eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))

                result = {}
                for metric in scores:
                    if default_metric == 'standard_deviation':
-                        avg = sum(scores[metric]) / len(scores[metric])
+                        avg = sum(scores[metric].values()) / len(scores[metric])
                        variance = sum((k - avg) ** 2 for k in scores[metric]) / len(scores[metric])
                        scores[metric] = result[metric] = math.sqrt(variance)
                    else:
@ -174,7 +174,7 @@ class DefaultSummarizer:
                            numerator = sum(scores[metric][k] * sg['weights'][k] for k in sg['weights'])
                            denominator = sum(sg['weights'].values())
                        else:
-                            numerator = sum(scores[metric])
+                            numerator = sum(scores[metric].values())
                            denominator = len(scores[metric])
                        scores[metric] = result[metric] = numerator / denominator
                    eval_modes = list(set(eval_modes))
--- a/opencompass/utils/text_postprocessors.py
+++ b/opencompass/utils/text_postprocessors.py
@ -51,19 +51,53 @@ def first_capital_postprocess(text: str) -> str:
 def first_option_postprocess(text: str, options: str) -> str:
    """Find first valid option for text."""

+    # yapf: disable
+    # flake8: noqa: W605
    patterns = [
-        f'[Tt]he answer is [{options}]',
-        f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]',  # noqa
-        f'答案(?:选项)?是(.*?)[{options}]',
-        f'答案(?:选项)?为(.*?)[{options}]',
-        f'答案(?:选项)?选(.*?)[{options}]',
-        f'选项[{options}]是?正确',
-        f'选项[{options}]为?正确',
-        f'固选(.*?)[{options}]',
-        f'答案应该是(.*?)[{options}]',
-        f'(\s|^)[{options}][\s。，,\.$]',  # noqa
+        f'答案是?\s?([{options}])',
+        f'答案是?\s?：([{options}])',
+        f'答案是?\s?:([{options}])',
+        f'答案应该?是\s?([{options}])',
+        f'答案应该?选\s?([{options}])',
+        f'答案为\s?([{options}])',
+        f'答案选\s?([{options}])',
+        f'选择?\s?([{options}])',
+        f'只有选?项?\s?([{options}])\s?是?对',
+        f'只有选?项?\s?([{options}])\s?是?错',
+        f'只有选?项?\s?([{options}])\s?不?正确',
+        f'只有选?项?\s?([{options}])\s?错误',
+        f'说法不?对选?项?的?是\s?([{options}])',
+        f'说法不?正确选?项?的?是\s?([{options}])',
+        f'说法错误选?项?的?是\s?([{options}])',
+        f'([{options}])\s?是正确的',
+        f'([{options}])\s?是正确答案',
+        f'选项\s?([{options}])\s?正确',
+        f'所以答\s?([{options}])',
+        f'1.\s?([{options}])[.。$]?$',
+        f'所以\s?([{options}][.。$]?$)',
+        f'所有\s?([{options}][.。$]?$)',
+        f'[\s，：:,]([{options}])[。，,\.]?$',
+        f'[\s，,：:][故即]([{options}])[。\.]?$',
+        f'[\s，,：:]因此([{options}])[。\.]?$',
+        f'[是为。]\s?([{options}])[。\.]?$',
+        f'因此\s?([{options}])[。\.]?$',
+        f'显然\s?([{options}])[。\.]?$',
+        f'1.\s?(.*?)$',
+        f'答案是\s?(\S+)(?:。|$)',
+        f'答案应该是\s?(\S+)(?:。|$)',
+        f'答案为\s?(\S+)(?:。|$)',
+        f'(\s|^)[{options}][\s。，,：:\.$]',
+        f'[Tt]he answer is ([{options}])',
+        f'[Tt]he answer is option ([{options}])',
+        f'[Tt]he correct answer is ([{options}])',
+        f'[Tt]he correct answer is option ([{options}])',
+        f'[Tt]he answer to the question is ([{options}])',
+        f'([{options}]):',
+        f'(^|\s)[{options}](\s|$)',
        f'[{options}]',
    ]
+    # flake8: noqa
+    # yapf: enable

    regexes = [re.compile(pattern) for pattern in patterns]
    for regex in regexes:
--- a/tools/prompt_viewer.py
+++ b/tools/prompt_viewer.py
@ -84,20 +84,17 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
        if infer_cfg.inferencer.type == PPLInferencer:
            labels = retriever.get_labels(ice_template=ice_template,
                                          prompt_template=prompt_template)
-            ice = [
-                retriever.generate_ice(ice_idx_list[_idx],
-                                       ice_template=ice_template)
-                for _idx in range(len(ice_idx_list))
-            ]
+            ice = retriever.generate_ice(ice_idx_list[idx],
+                                         ice_template=ice_template)
            print('-' * 100)
            print('ICE Template:')
            print('-' * 100)
-            print(ice[0])
+            print(ice)
            print('-' * 100)
            for label in labels:
                prompt = retriever.generate_label_prompt(
                    idx,
-                    ice[idx],
+                    ice,
                    label,
                    ice_template=ice_template,
                    prompt_template=prompt_template,
@ -111,11 +108,11 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
                        print(f'Truncating ice {num_ice} -> {num_ice - 1}',
                              f'Number of tokens: {prompt_token_num} -> ...')
                        ice_idx_list[idx] = ice_idx_list[idx][:-1]
-                        ice[idx] = retriever.generate_ice(
-                            ice_idx_list[idx], ice_template=ice_template)
+                        ice = retriever.generate_ice(ice_idx_list[idx],
+                                                     ice_template=ice_template)
                        prompt = retriever.generate_label_prompt(
                            idx,
-                            ice[idx],
+                            ice,
                            label,
                            ice_template=ice_template,
                            prompt_template=prompt_template)