[Feat] Update charm summary (#1194)

This commit is contained in:
Fengzhe Zhou 2024-05-27 16:17:01 +08:00 committed by GitHub
parent 608ff5810d
commit 9fa80b0f93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 153 additions and 36 deletions

View File

@ -1 +0,0 @@
/cpfs01/user/zhoufengzhe/fattn2/opencompass/configs/internal/

View File

@ -32,41 +32,11 @@ Translate_EN_template = 'Follow the given examples and answer the question.\n{_h
Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ{{input}}\nA' Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ{{input}}\nA'
settings = [ settings = [
( ('Direct', '', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
'Direct', ('ZH-CoT', '让我们一步一步来思考。', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
'', ('EN-CoT', "Let's think step by step.", dataset_path_ZH, fewshot_example_path_ZH, Other_template),
dataset_path_ZH, ('XLT', """You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""", dataset_path_ZH, fewshot_example_path_ZH, XLT_template),
fewshot_example_path_ZH, ('Translate-EN', "Let's think step by step.", dataset_path_TransEn, fewshot_example_path_TransEn, Translate_EN_template),
Other_template,
),
(
'ZH-CoT',
'让我们一步一步来思考。',
dataset_path_ZH,
fewshot_example_path_ZH,
Other_template,
),
(
'EN-CoT',
"Let's think step by step.",
dataset_path_ZH,
fewshot_example_path_ZH,
Other_template,
),
(
'XLT',
"""You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""",
dataset_path_ZH,
fewshot_example_path_ZH,
XLT_template,
),
(
'Translate-EN',
"Let's think step by step.",
dataset_path_TransEn,
fewshot_example_path_TransEn,
Translate_EN_template,
),
] ]
charm_rea_datasets = [] charm_rea_datasets = []

View File

@ -27,6 +27,26 @@ with read_base():
# from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model # from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model
# from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model # from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model
from .summarizers.charm_rea import summarizer
models = sum([v for k, v in locals().items() if k.endswith('_model')], []) models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
work_dir = './outputs/CHARM/chat/' work_dir = './outputs/CHARM/chat/'
# dataset version metric mode internlm2-chat-7b-turbomind
# ------------------------------------------------------------- --------- ------------- ------ -----------------------------
# charm-rea-Direct - naive_average gen 49.51
# charm-rea-ZH-CoT - naive_average gen 61.33
# charm-rea-EN-CoT - naive_average gen 54.55
# charm-rea-XLT - naive_average gen 58.46
# charm-rea-Translate-EN - naive_average gen 56.15
# - - - -
# charm-rea-Chinese_Direct - naive_average gen 47.14
# charm-rea-Chinese_ZH-CoT - naive_average gen 58.40
# charm-rea-Chinese_EN-CoT - naive_average gen 48.31
# charm-rea-Chinese_XLT - naive_average gen 53.57
# charm-rea-Chinese_Translate-EN - naive_average gen 48.21
# charm-rea-Global_Direct - naive_average gen 51.88
# charm-rea-Global_ZH-CoT - naive_average gen 64.26
# charm-rea-Global_EN-CoT - naive_average gen 60.79
# charm-rea-Global_XLT - naive_average gen 63.36
# charm-rea-Global_Translate-EN - naive_average gen 64.10

View File

@ -0,0 +1,98 @@
from mmengine.config import read_base
with read_base():
from .groups.charm_rea import charm_rea_summary_groups
summarizer = dict(
dataset_abbrs=[
'charm-rea-Direct',
'charm-rea-ZH-CoT',
'charm-rea-EN-CoT',
'charm-rea-XLT',
'charm-rea-Translate-EN',
'',
'charm-rea-Chinese_Direct',
'charm-rea-Chinese_ZH-CoT',
'charm-rea-Chinese_EN-CoT',
'charm-rea-Chinese_XLT',
'charm-rea-Chinese_Translate-EN',
'charm-rea-Global_Direct',
'charm-rea-Global_ZH-CoT',
'charm-rea-Global_EN-CoT',
'charm-rea-Global_XLT',
'charm-rea-Global_Translate-EN',
'',
'charm-rea-Chinese_Anachronisms_Judgment_Direct',
'charm-rea-Chinese_Movie_and_Music_Recommendation_Direct',
'charm-rea-Chinese_Natural_Language_Inference_Direct',
'charm-rea-Chinese_Reading_Comprehension_Direct',
'charm-rea-Chinese_Sequence_Understanding_Direct',
'charm-rea-Chinese_Sport_Understanding_Direct',
'charm-rea-Chinese_Time_Understanding_Direct',
'charm-rea-Global_Anachronisms_Judgment_Direct',
'charm-rea-Global_Movie_and_Music_Recommendation_Direct',
'charm-rea-Global_Natural_Language_Inference_Direct',
'charm-rea-Global_Reading_Comprehension_Direct',
'charm-rea-Global_Sequence_Understanding_Direct',
'charm-rea-Global_Sport_Understanding_Direct',
'charm-rea-Global_Time_Understanding_Direct',
'charm-rea-Chinese_Anachronisms_Judgment_ZH-CoT',
'charm-rea-Chinese_Movie_and_Music_Recommendation_ZH-CoT',
'charm-rea-Chinese_Natural_Language_Inference_ZH-CoT',
'charm-rea-Chinese_Reading_Comprehension_ZH-CoT',
'charm-rea-Chinese_Sequence_Understanding_ZH-CoT',
'charm-rea-Chinese_Sport_Understanding_ZH-CoT',
'charm-rea-Chinese_Time_Understanding_ZH-CoT',
'charm-rea-Global_Anachronisms_Judgment_ZH-CoT',
'charm-rea-Global_Movie_and_Music_Recommendation_ZH-CoT',
'charm-rea-Global_Natural_Language_Inference_ZH-CoT',
'charm-rea-Global_Reading_Comprehension_ZH-CoT',
'charm-rea-Global_Sequence_Understanding_ZH-CoT',
'charm-rea-Global_Sport_Understanding_ZH-CoT',
'charm-rea-Global_Time_Understanding_ZH-CoT',
'charm-rea-Chinese_Anachronisms_Judgment_EN-CoT',
'charm-rea-Chinese_Movie_and_Music_Recommendation_EN-CoT',
'charm-rea-Chinese_Natural_Language_Inference_EN-CoT',
'charm-rea-Chinese_Reading_Comprehension_EN-CoT',
'charm-rea-Chinese_Sequence_Understanding_EN-CoT',
'charm-rea-Chinese_Sport_Understanding_EN-CoT',
'charm-rea-Chinese_Time_Understanding_EN-CoT',
'charm-rea-Global_Anachronisms_Judgment_EN-CoT',
'charm-rea-Global_Movie_and_Music_Recommendation_EN-CoT',
'charm-rea-Global_Natural_Language_Inference_EN-CoT',
'charm-rea-Global_Reading_Comprehension_EN-CoT',
'charm-rea-Global_Sequence_Understanding_EN-CoT',
'charm-rea-Global_Sport_Understanding_EN-CoT',
'charm-rea-Global_Time_Understanding_EN-CoT',
'charm-rea-Chinese_Anachronisms_Judgment_XLT',
'charm-rea-Chinese_Movie_and_Music_Recommendation_XLT',
'charm-rea-Chinese_Natural_Language_Inference_XLT',
'charm-rea-Chinese_Reading_Comprehension_XLT',
'charm-rea-Chinese_Sequence_Understanding_XLT',
'charm-rea-Chinese_Sport_Understanding_XLT',
'charm-rea-Chinese_Time_Understanding_XLT',
'charm-rea-Global_Anachronisms_Judgment_XLT',
'charm-rea-Global_Movie_and_Music_Recommendation_XLT',
'charm-rea-Global_Natural_Language_Inference_XLT',
'charm-rea-Global_Reading_Comprehension_XLT',
'charm-rea-Global_Sequence_Understanding_XLT',
'charm-rea-Global_Sport_Understanding_XLT',
'charm-rea-Global_Time_Understanding_XLT',
'charm-rea-Chinese_Anachronisms_Judgment_Translate-EN',
'charm-rea-Chinese_Movie_and_Music_Recommendation_Translate-EN',
'charm-rea-Chinese_Natural_Language_Inference_Translate-EN',
'charm-rea-Chinese_Reading_Comprehension_Translate-EN',
'charm-rea-Chinese_Sequence_Understanding_Translate-EN',
'charm-rea-Chinese_Sport_Understanding_Translate-EN',
'charm-rea-Chinese_Time_Understanding_Translate-EN',
'charm-rea-Global_Anachronisms_Judgment_Translate-EN',
'charm-rea-Global_Movie_and_Music_Recommendation_Translate-EN',
'charm-rea-Global_Natural_Language_Inference_Translate-EN',
'charm-rea-Global_Reading_Comprehension_Translate-EN',
'charm-rea-Global_Sequence_Understanding_Translate-EN',
'charm-rea-Global_Sport_Understanding_Translate-EN',
'charm-rea-Global_Time_Understanding_Translate-EN',
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith('_summary_groups')], [])
)

View File

@ -0,0 +1,30 @@
charm_tasks = [
'Anachronisms_Judgment',
'Movie_and_Music_Recommendation',
'Natural_Language_Inference',
'Reading_Comprehension',
'Sequence_Understanding',
'Sport_Understanding',
'Time_Understanding',
]
regions = [
'Chinese',
'Global',
]
prompts = [
'Direct',
'ZH-CoT',
'EN-CoT',
'XLT',
'Translate-EN',
]
charm_rea_summary_groups = []
for prompt in prompts:
for region in regions:
subsets = ['charm-rea-' + region + '_' + task + '_' + prompt for task in charm_tasks]
charm_rea_summary_groups.append({'name': 'charm-rea-' + region + '_' + prompt, 'subsets': subsets})
for prompt in prompts:
subsets = ['charm-rea-' + region + '_' + prompt for region in regions]
charm_rea_summary_groups.append({'name': 'charm-rea-' + prompt, 'subsets': subsets})