from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.datasets import CaLMDataset, CaLMEvaluator task_hiearchy_dict = { # association/ # correlation/ 'CORR-B_correlation_CN':'association/correlation/', 'CORR-B_correlation_EN':'association/correlation/', # explaining_away_effect/ 'EAE-B_exp-away_CN':'association/explaining_away_effect/', 'EAE-B_exp-away_EN':'association/explaining_away_effect/', # causal_discovery/ # abstract_reasoning/ 'AR-B_CaLM-AR_CN':'causal_discovery/abstract_reasoning/', 'AR-B_CaLM-AR_EN':'causal_discovery/abstract_reasoning/', # causal_attribution/ 'CA-B_FA_CN':'causal_discovery/causal_attribution/', 'CA-B_FA_EN':'causal_discovery/causal_attribution/', 'CA-B_FP_CN':'causal_discovery/causal_attribution/', 'CA-B_FP_EN':'causal_discovery/causal_attribution/', # event_causality_identification/ 'ECI-B_CTB_CN':'causal_discovery/event_causality_identification/', 'ECI-B_CTB_EN':'causal_discovery/event_causality_identification/', 'ECI-B_ESC_CN':'causal_discovery/event_causality_identification/', 'ECI-B_ESC_EN':'causal_discovery/event_causality_identification/', 'ECI-B_MAVEN-ERE_CN':'causal_discovery/event_causality_identification/', 'ECI-B_MAVEN-ERE_EN':'causal_discovery/event_causality_identification/', # pairwise_causal_discovery/ 'PCD-B_COPA_CN':'causal_discovery/pairwise_causal_discovery/', 'PCD-B_COPA_EN':'causal_discovery/pairwise_causal_discovery/', 'PCD-B_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/', 'PCD-B_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/', 'PCD-C_COPA_CN':'causal_discovery/pairwise_causal_discovery/', 'PCD-C_COPA_EN':'causal_discovery/pairwise_causal_discovery/', 'PCD-C_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/', 'PCD-C_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/', # counterfactual/ # actual_causality/ 'AC-B_causal_judgement_CN':'counterfactual/actual_causality/', 'AC-B_causal_judgement_EN':'counterfactual/actual_causality/', # causal_explanation_generation/ 'CEG-O_E-CARE_CN':'counterfactual/causal_explanation_generation/', 'CEG-O_E-CARE_EN':'counterfactual/causal_explanation_generation/', # counterfactual_reasoning/ 'CR-B_det-counterfactual_CN':'counterfactual/counterfactual_reasoning/', 'CR-B_det-counterfactual_EN':'counterfactual/counterfactual_reasoning/', 'CR-C_CRASS_CN':'counterfactual/counterfactual_reasoning/', 'CR-C_CRASS_EN':'counterfactual/counterfactual_reasoning/', # effect_of_the_treatment_on_the_treated/ 'ETT-B_ETT-natural_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', 'ETT-B_ETT-natural_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', 'ETT-P_ETT-basic_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', 'ETT-P_ETT-basic_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', 'ETT-P_ETT-hard_CN':'counterfactual/effect_of_the_treatment_on_the_treated/', 'ETT-P_ETT-hard_EN':'counterfactual/effect_of_the_treatment_on_the_treated/', # natural_direct_effect/ 'NDE-B_NDE-natural_CN':'counterfactual/natural_direct_effect/', 'NDE-B_NDE-natural_EN':'counterfactual/natural_direct_effect/', 'NDE-P_NDE-basic_CN':'counterfactual/natural_direct_effect/', 'NDE-P_NDE-basic_EN':'counterfactual/natural_direct_effect/', 'NDE-P_NDE-hard_CN':'counterfactual/natural_direct_effect/', 'NDE-P_NDE-hard_EN':'counterfactual/natural_direct_effect/', # natural_indirect_effect/ 'NIE-B_NIE-natural_CN':'counterfactual/natural_indirect_effect/', 'NIE-B_NIE-natural_EN':'counterfactual/natural_indirect_effect/', 'NIE-P_NIE-basic_CN':'counterfactual/natural_indirect_effect/', 'NIE-P_NIE-basic_EN':'counterfactual/natural_indirect_effect/', 'NIE-P_NIE-hard_CN':'counterfactual/natural_indirect_effect/', 'NIE-P_NIE-hard_EN':'counterfactual/natural_indirect_effect/', # probability_of_necessity/ 'PN-P_PN-basic_CN':'counterfactual/probability_of_necessity/', 'PN-P_PN-basic_EN':'counterfactual/probability_of_necessity/', 'PN-P_PN-hard_CN':'counterfactual/probability_of_necessity/', 'PN-P_PN-hard_EN':'counterfactual/probability_of_necessity/', # probability_of_sufficiency/ 'PS-P_PS-basic_CN':'counterfactual/probability_of_sufficiency/', 'PS-P_PS-basic_EN':'counterfactual/probability_of_sufficiency/', 'PS-P_PS-hard_CN':'counterfactual/probability_of_sufficiency/', 'PS-P_PS-hard_EN':'counterfactual/probability_of_sufficiency/', # intervention/ # average_treatment_effect/ 'ATE-B_ATE-natural_CN':'intervention/average_treatment_effect/', 'ATE-B_ATE-natural_EN':'intervention/average_treatment_effect/', 'ATE-P_ATE-basic_CN':'intervention/average_treatment_effect/', 'ATE-P_ATE-basic_EN':'intervention/average_treatment_effect/', 'ATE-P_ATE-hard_CN':'intervention/average_treatment_effect/', 'ATE-P_ATE-hard_EN':'intervention/average_treatment_effect/', # backdoor_adjustment_set/ 'BAS-B_backadj_CN':'intervention/backdoor_adjustment_set/', 'BAS-B_backadj_EN':'intervention/backdoor_adjustment_set/', 'BAS-C_max-BAS_CN':'intervention/backdoor_adjustment_set/', 'BAS-C_max-BAS_EN':'intervention/backdoor_adjustment_set/', 'BAS-C_min-BAS_CN':'intervention/backdoor_adjustment_set/', 'BAS-C_min-BAS_EN':'intervention/backdoor_adjustment_set/', 'BAS-C_mix-BAS_CN':'intervention/backdoor_adjustment_set/', 'BAS-C_mix-BAS_EN':'intervention/backdoor_adjustment_set/', # causal_effect_identification/ 'CEI-B_0.2-UC_CN':'intervention/causal_effect_identification/', 'CEI-B_0.2-UC_EN':'intervention/causal_effect_identification/', 'CEI-B_0.4-UC_CN':'intervention/causal_effect_identification/', 'CEI-B_0.4-UC_EN':'intervention/causal_effect_identification/', 'CEI-B_0.6-UC_CN':'intervention/causal_effect_identification/', 'CEI-B_0.6-UC_EN':'intervention/causal_effect_identification/', 'CEI-B_0.8-UC_CN':'intervention/causal_effect_identification/', 'CEI-B_0.8-UC_EN':'intervention/causal_effect_identification/', # collider_bias/ 'CB-B_collider-bias_CN':'intervention/collider_bias/', 'CB-B_collider-bias_EN':'intervention/collider_bias/', # controlled_direct_effect/ 'CDE-B_CDE-natural_CN':'intervention/controlled_direct_effect/', 'CDE-B_CDE-natural_EN':'intervention/controlled_direct_effect/', 'CDE-P_CDE-basic_CN':'intervention/controlled_direct_effect/', 'CDE-P_CDE-basic_EN':'intervention/controlled_direct_effect/', 'CDE-P_CDE-hard_CN':'intervention/controlled_direct_effect/', 'CDE-P_CDE-hard_EN':'intervention/controlled_direct_effect/', # frontdoor_adjustment_set/ 'FAS-C_FAS_CN':'intervention/frontdoor_adjustment_set/', 'FAS-C_FAS_EN':'intervention/frontdoor_adjustment_set/', # instrumental_variable/ 'IV-C_CaLM-IV_CN':'intervention/instrumental_variable/', 'IV-C_CaLM-IV_EN':'intervention/instrumental_variable/',} calm_reader_cfg = dict( input_columns=['question'], output_column='gt_item') calm_all_sets = list(set(key[:-3] for key in task_hiearchy_dict.keys())) calm_datasets = [] for _name in calm_all_sets: for _prompt_style in ['basic','basic-CN']: _task_name = _name + ('_CN' if _prompt_style.endswith('-CN') else '_EN') _path = f'./data/calm/{task_hiearchy_dict[_task_name]}{_task_name}.json' calm_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template='{question}'), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer, max_out_len=500)) calm_eval_cfg = dict(evaluator=dict( type=CaLMEvaluator, core_metrics=True, error_analysis=True, prompt_style=_prompt_style, task=_task_name)) calm_datasets.append( dict( abbr=f'calm_{_task_name}', type=CaLMDataset, path=_path, prompt_style=_prompt_style, reader_cfg=calm_reader_cfg, infer_cfg=calm_infer_cfg, eval_cfg=calm_eval_cfg) ) del _prompt_style, _task_name, _path, _name