OpenCompass/configs/datasets/glm/mmlu.py

156 lines
4.2 KiB
Python
Raw Normal View History

2023-07-05 10:33:12 +08:00
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
mmlu_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D'],
output_column='target',
train_split='validation')
mmlu_prompt_template = dict(
type=PromptTemplate,
template=None,
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
},
ice_token='</E>',
)
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
target: '</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\n'
f'Answer: ({target}) </{target}>\n'
for target in ['A', 'B', 'C', 'D']
},
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
}),
prompt_template=mmlu_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GLMChoiceInferencer, fix_id_list=[0, 1, 2, 3, 4]))
mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
mmlu_all_sets = [
"college_biology",
# "college_chemistry",
# "college_computer_science",
# "college_mathematics",
# "college_physics",
# "electrical_engineering",
# "astronomy",
# "anatomy",
# "abstract_algebra",
# "machine_learning",
# "clinical_knowledge",
# "global_facts",
# "management",
# "nutrition",
# "marketing",
# "professional_accounting",
# "high_school_geography",
# "international_law",
# "moral_scenarios",
# "computer_security",
# "high_school_microeconomics",
# "professional_law",
# "medical_genetics",
# "professional_psychology",
# "jurisprudence",
# "world_religions",
# "philosophy",
# "virology",
# "high_school_chemistry",
# "public_relations",
# "high_school_macroeconomics",
# "human_sexuality",
# "elementary_mathematics",
# "high_school_physics",
# "high_school_computer_science",
# "high_school_european_history",
# "business_ethics",
# "moral_disputes",
# "high_school_statistics",
# "miscellaneous",
# "formal_logic",
# "high_school_government_and_politics",
# "prehistory",
# "security_studies",
# "high_school_biology",
# "logical_fallacies",
# "high_school_world_history",
# "professional_medicine",
# "high_school_mathematics",
# "college_medicine",
# "high_school_us_history",
# "sociology",
# "econometrics",
# "high_school_psychology",
# "human_aging",
# "us_foreign_policy",
# "conceptual_physics",
]
mmlu_key_sets = [
'college_biology',
'college_chemistry',
'college_computer_science',
'college_mathematics',
'college_physics',
'electrical_engineering',
'astronomy',
'anatomy',
'abstract_algebra',
'machine_learning',
'clinical_knowledge',
'global_facts',
'management',
'nutrition',
'marketing',
'professional_accounting',
]
mmlu_datasets = []
for name in mmlu_all_sets:
mmlu_datasets.append(
dict(
type=HFDataset,
path='lukaemon/mmlu',
name=name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(),
eval_cfg=mmlu_eval_cfg))
mmlu_datasets[-1]['infer_cfg'][
'prompt_template'] = mmlu_prompt_template.copy()
mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=
f'The following are multiple choice questions (with answers) about {name.replace("_", " ")}.'
),
'</E>',
],
round=[
dict(
role='HUMAN',
prompt=
'</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\nAnswer: ('),
],
)