From 9fa80b0f936577e70c1b6861dcc161a37f64787e Mon Sep 17 00:00:00 2001 From: Fengzhe Zhou Date: Mon, 27 May 2024 16:17:01 +0800 Subject: [PATCH] [Feat] Update charm summary (#1194) --- configs/_internal | 1 - .../datasets/CHARM/charm_rea_gen_f8fca2.py | 40 +------- configs/eval_charm.py | 20 ++++ configs/summarizers/charm_rea.py | 98 +++++++++++++++++++ configs/summarizers/groups/charm_rea.py | 30 ++++++ 5 files changed, 153 insertions(+), 36 deletions(-) delete mode 120000 configs/_internal create mode 100644 configs/summarizers/charm_rea.py create mode 100644 configs/summarizers/groups/charm_rea.py diff --git a/configs/_internal b/configs/_internal deleted file mode 120000 index 92fa70e5..00000000 --- a/configs/_internal +++ /dev/null @@ -1 +0,0 @@ -/cpfs01/user/zhoufengzhe/fattn2/opencompass/configs/internal/ \ No newline at end of file diff --git a/configs/datasets/CHARM/charm_rea_gen_f8fca2.py b/configs/datasets/CHARM/charm_rea_gen_f8fca2.py index 70f97304..f9af1c6e 100644 --- a/configs/datasets/CHARM/charm_rea_gen_f8fca2.py +++ b/configs/datasets/CHARM/charm_rea_gen_f8fca2.py @@ -32,41 +32,11 @@ Translate_EN_template = 'Follow the given examples and answer the question.\n{_h Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ:{{input}}\nA:' settings = [ - ( - 'Direct', - '', - dataset_path_ZH, - fewshot_example_path_ZH, - Other_template, - ), - ( - 'ZH-CoT', - '让我们一步一步来思考。', - dataset_path_ZH, - fewshot_example_path_ZH, - Other_template, - ), - ( - 'EN-CoT', - "Let's think step by step.", - dataset_path_ZH, - fewshot_example_path_ZH, - Other_template, - ), - ( - 'XLT', - """You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""", - dataset_path_ZH, - fewshot_example_path_ZH, - XLT_template, - ), - ( - 'Translate-EN', - "Let's think step by step.", - dataset_path_TransEn, - fewshot_example_path_TransEn, - Translate_EN_template, - ), + ('Direct', '', dataset_path_ZH, fewshot_example_path_ZH, Other_template), + ('ZH-CoT', '让我们一步一步来思考。', dataset_path_ZH, fewshot_example_path_ZH, Other_template), + ('EN-CoT', "Let's think step by step.", dataset_path_ZH, fewshot_example_path_ZH, Other_template), + ('XLT', """You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""", dataset_path_ZH, fewshot_example_path_ZH, XLT_template), + ('Translate-EN', "Let's think step by step.", dataset_path_TransEn, fewshot_example_path_TransEn, Translate_EN_template), ] charm_rea_datasets = [] diff --git a/configs/eval_charm.py b/configs/eval_charm.py index 9a128490..fb6f1700 100644 --- a/configs/eval_charm.py +++ b/configs/eval_charm.py @@ -27,6 +27,26 @@ with read_base(): # from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model # from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model + from .summarizers.charm_rea import summarizer models = sum([v for k, v in locals().items() if k.endswith('_model')], []) work_dir = './outputs/CHARM/chat/' + +# dataset version metric mode internlm2-chat-7b-turbomind +# ------------------------------------------------------------- --------- ------------- ------ ----------------------------- +# charm-rea-Direct - naive_average gen 49.51 +# charm-rea-ZH-CoT - naive_average gen 61.33 +# charm-rea-EN-CoT - naive_average gen 54.55 +# charm-rea-XLT - naive_average gen 58.46 +# charm-rea-Translate-EN - naive_average gen 56.15 +# - - - - +# charm-rea-Chinese_Direct - naive_average gen 47.14 +# charm-rea-Chinese_ZH-CoT - naive_average gen 58.40 +# charm-rea-Chinese_EN-CoT - naive_average gen 48.31 +# charm-rea-Chinese_XLT - naive_average gen 53.57 +# charm-rea-Chinese_Translate-EN - naive_average gen 48.21 +# charm-rea-Global_Direct - naive_average gen 51.88 +# charm-rea-Global_ZH-CoT - naive_average gen 64.26 +# charm-rea-Global_EN-CoT - naive_average gen 60.79 +# charm-rea-Global_XLT - naive_average gen 63.36 +# charm-rea-Global_Translate-EN - naive_average gen 64.10 diff --git a/configs/summarizers/charm_rea.py b/configs/summarizers/charm_rea.py new file mode 100644 index 00000000..953b2453 --- /dev/null +++ b/configs/summarizers/charm_rea.py @@ -0,0 +1,98 @@ +from mmengine.config import read_base + +with read_base(): + from .groups.charm_rea import charm_rea_summary_groups + +summarizer = dict( + dataset_abbrs=[ + 'charm-rea-Direct', + 'charm-rea-ZH-CoT', + 'charm-rea-EN-CoT', + 'charm-rea-XLT', + 'charm-rea-Translate-EN', + '', + 'charm-rea-Chinese_Direct', + 'charm-rea-Chinese_ZH-CoT', + 'charm-rea-Chinese_EN-CoT', + 'charm-rea-Chinese_XLT', + 'charm-rea-Chinese_Translate-EN', + 'charm-rea-Global_Direct', + 'charm-rea-Global_ZH-CoT', + 'charm-rea-Global_EN-CoT', + 'charm-rea-Global_XLT', + 'charm-rea-Global_Translate-EN', + '', + 'charm-rea-Chinese_Anachronisms_Judgment_Direct', + 'charm-rea-Chinese_Movie_and_Music_Recommendation_Direct', + 'charm-rea-Chinese_Natural_Language_Inference_Direct', + 'charm-rea-Chinese_Reading_Comprehension_Direct', + 'charm-rea-Chinese_Sequence_Understanding_Direct', + 'charm-rea-Chinese_Sport_Understanding_Direct', + 'charm-rea-Chinese_Time_Understanding_Direct', + 'charm-rea-Global_Anachronisms_Judgment_Direct', + 'charm-rea-Global_Movie_and_Music_Recommendation_Direct', + 'charm-rea-Global_Natural_Language_Inference_Direct', + 'charm-rea-Global_Reading_Comprehension_Direct', + 'charm-rea-Global_Sequence_Understanding_Direct', + 'charm-rea-Global_Sport_Understanding_Direct', + 'charm-rea-Global_Time_Understanding_Direct', + 'charm-rea-Chinese_Anachronisms_Judgment_ZH-CoT', + 'charm-rea-Chinese_Movie_and_Music_Recommendation_ZH-CoT', + 'charm-rea-Chinese_Natural_Language_Inference_ZH-CoT', + 'charm-rea-Chinese_Reading_Comprehension_ZH-CoT', + 'charm-rea-Chinese_Sequence_Understanding_ZH-CoT', + 'charm-rea-Chinese_Sport_Understanding_ZH-CoT', + 'charm-rea-Chinese_Time_Understanding_ZH-CoT', + 'charm-rea-Global_Anachronisms_Judgment_ZH-CoT', + 'charm-rea-Global_Movie_and_Music_Recommendation_ZH-CoT', + 'charm-rea-Global_Natural_Language_Inference_ZH-CoT', + 'charm-rea-Global_Reading_Comprehension_ZH-CoT', + 'charm-rea-Global_Sequence_Understanding_ZH-CoT', + 'charm-rea-Global_Sport_Understanding_ZH-CoT', + 'charm-rea-Global_Time_Understanding_ZH-CoT', + 'charm-rea-Chinese_Anachronisms_Judgment_EN-CoT', + 'charm-rea-Chinese_Movie_and_Music_Recommendation_EN-CoT', + 'charm-rea-Chinese_Natural_Language_Inference_EN-CoT', + 'charm-rea-Chinese_Reading_Comprehension_EN-CoT', + 'charm-rea-Chinese_Sequence_Understanding_EN-CoT', + 'charm-rea-Chinese_Sport_Understanding_EN-CoT', + 'charm-rea-Chinese_Time_Understanding_EN-CoT', + 'charm-rea-Global_Anachronisms_Judgment_EN-CoT', + 'charm-rea-Global_Movie_and_Music_Recommendation_EN-CoT', + 'charm-rea-Global_Natural_Language_Inference_EN-CoT', + 'charm-rea-Global_Reading_Comprehension_EN-CoT', + 'charm-rea-Global_Sequence_Understanding_EN-CoT', + 'charm-rea-Global_Sport_Understanding_EN-CoT', + 'charm-rea-Global_Time_Understanding_EN-CoT', + 'charm-rea-Chinese_Anachronisms_Judgment_XLT', + 'charm-rea-Chinese_Movie_and_Music_Recommendation_XLT', + 'charm-rea-Chinese_Natural_Language_Inference_XLT', + 'charm-rea-Chinese_Reading_Comprehension_XLT', + 'charm-rea-Chinese_Sequence_Understanding_XLT', + 'charm-rea-Chinese_Sport_Understanding_XLT', + 'charm-rea-Chinese_Time_Understanding_XLT', + 'charm-rea-Global_Anachronisms_Judgment_XLT', + 'charm-rea-Global_Movie_and_Music_Recommendation_XLT', + 'charm-rea-Global_Natural_Language_Inference_XLT', + 'charm-rea-Global_Reading_Comprehension_XLT', + 'charm-rea-Global_Sequence_Understanding_XLT', + 'charm-rea-Global_Sport_Understanding_XLT', + 'charm-rea-Global_Time_Understanding_XLT', + 'charm-rea-Chinese_Anachronisms_Judgment_Translate-EN', + 'charm-rea-Chinese_Movie_and_Music_Recommendation_Translate-EN', + 'charm-rea-Chinese_Natural_Language_Inference_Translate-EN', + 'charm-rea-Chinese_Reading_Comprehension_Translate-EN', + 'charm-rea-Chinese_Sequence_Understanding_Translate-EN', + 'charm-rea-Chinese_Sport_Understanding_Translate-EN', + 'charm-rea-Chinese_Time_Understanding_Translate-EN', + 'charm-rea-Global_Anachronisms_Judgment_Translate-EN', + 'charm-rea-Global_Movie_and_Music_Recommendation_Translate-EN', + 'charm-rea-Global_Natural_Language_Inference_Translate-EN', + 'charm-rea-Global_Reading_Comprehension_Translate-EN', + 'charm-rea-Global_Sequence_Understanding_Translate-EN', + 'charm-rea-Global_Sport_Understanding_Translate-EN', + 'charm-rea-Global_Time_Understanding_Translate-EN', + ], + summary_groups=sum( + [v for k, v in locals().items() if k.endswith('_summary_groups')], []) +) diff --git a/configs/summarizers/groups/charm_rea.py b/configs/summarizers/groups/charm_rea.py new file mode 100644 index 00000000..ade93275 --- /dev/null +++ b/configs/summarizers/groups/charm_rea.py @@ -0,0 +1,30 @@ +charm_tasks = [ + 'Anachronisms_Judgment', + 'Movie_and_Music_Recommendation', + 'Natural_Language_Inference', + 'Reading_Comprehension', + 'Sequence_Understanding', + 'Sport_Understanding', + 'Time_Understanding', +] +regions = [ + 'Chinese', + 'Global', +] +prompts = [ + 'Direct', + 'ZH-CoT', + 'EN-CoT', + 'XLT', + 'Translate-EN', +] + + +charm_rea_summary_groups = [] +for prompt in prompts: + for region in regions: + subsets = ['charm-rea-' + region + '_' + task + '_' + prompt for task in charm_tasks] + charm_rea_summary_groups.append({'name': 'charm-rea-' + region + '_' + prompt, 'subsets': subsets}) +for prompt in prompts: + subsets = ['charm-rea-' + region + '_' + prompt for region in regions] + charm_rea_summary_groups.append({'name': 'charm-rea-' + prompt, 'subsets': subsets})