mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feat] Update charm summary (#1194)
This commit is contained in:
parent
608ff5810d
commit
9fa80b0f93
@ -1 +0,0 @@
|
|||||||
/cpfs01/user/zhoufengzhe/fattn2/opencompass/configs/internal/
|
|
@ -32,41 +32,11 @@ Translate_EN_template = 'Follow the given examples and answer the question.\n{_h
|
|||||||
Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ:{{input}}\nA:'
|
Other_template = '请按照给定的例子回答问题。\n{_hint}\n\nQ:{{input}}\nA:'
|
||||||
|
|
||||||
settings = [
|
settings = [
|
||||||
(
|
('Direct', '', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
|
||||||
'Direct',
|
('ZH-CoT', '让我们一步一步来思考。', dataset_path_ZH, fewshot_example_path_ZH, Other_template),
|
||||||
'',
|
('EN-CoT', "Let's think step by step.", dataset_path_ZH, fewshot_example_path_ZH, Other_template),
|
||||||
dataset_path_ZH,
|
('XLT', """You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""", dataset_path_ZH, fewshot_example_path_ZH, XLT_template),
|
||||||
fewshot_example_path_ZH,
|
('Translate-EN', "Let's think step by step.", dataset_path_TransEn, fewshot_example_path_TransEn, Translate_EN_template),
|
||||||
Other_template,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'ZH-CoT',
|
|
||||||
'让我们一步一步来思考。',
|
|
||||||
dataset_path_ZH,
|
|
||||||
fewshot_example_path_ZH,
|
|
||||||
Other_template,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'EN-CoT',
|
|
||||||
"Let's think step by step.",
|
|
||||||
dataset_path_ZH,
|
|
||||||
fewshot_example_path_ZH,
|
|
||||||
Other_template,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'XLT',
|
|
||||||
"""You should retell the request in English.\nYou should do the answer step by step to choose the right answer.\nYou should step-by-step answer the request.\nYou should tell me the answer in this format 'So the answer is'.""",
|
|
||||||
dataset_path_ZH,
|
|
||||||
fewshot_example_path_ZH,
|
|
||||||
XLT_template,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'Translate-EN',
|
|
||||||
"Let's think step by step.",
|
|
||||||
dataset_path_TransEn,
|
|
||||||
fewshot_example_path_TransEn,
|
|
||||||
Translate_EN_template,
|
|
||||||
),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
charm_rea_datasets = []
|
charm_rea_datasets = []
|
||||||
|
@ -27,6 +27,26 @@ with read_base():
|
|||||||
|
|
||||||
# from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model
|
# from .models.hf_llama.hf_llama3_8b_instruct import models as llama3_8b_instruct_model
|
||||||
# from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model
|
# from .models.hf_llama.hf_llama3_70b_instruct import models as llama3_70b_instruct_model
|
||||||
|
from .summarizers.charm_rea import summarizer
|
||||||
|
|
||||||
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
|
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
|
||||||
work_dir = './outputs/CHARM/chat/'
|
work_dir = './outputs/CHARM/chat/'
|
||||||
|
|
||||||
|
# dataset version metric mode internlm2-chat-7b-turbomind
|
||||||
|
# ------------------------------------------------------------- --------- ------------- ------ -----------------------------
|
||||||
|
# charm-rea-Direct - naive_average gen 49.51
|
||||||
|
# charm-rea-ZH-CoT - naive_average gen 61.33
|
||||||
|
# charm-rea-EN-CoT - naive_average gen 54.55
|
||||||
|
# charm-rea-XLT - naive_average gen 58.46
|
||||||
|
# charm-rea-Translate-EN - naive_average gen 56.15
|
||||||
|
# - - - -
|
||||||
|
# charm-rea-Chinese_Direct - naive_average gen 47.14
|
||||||
|
# charm-rea-Chinese_ZH-CoT - naive_average gen 58.40
|
||||||
|
# charm-rea-Chinese_EN-CoT - naive_average gen 48.31
|
||||||
|
# charm-rea-Chinese_XLT - naive_average gen 53.57
|
||||||
|
# charm-rea-Chinese_Translate-EN - naive_average gen 48.21
|
||||||
|
# charm-rea-Global_Direct - naive_average gen 51.88
|
||||||
|
# charm-rea-Global_ZH-CoT - naive_average gen 64.26
|
||||||
|
# charm-rea-Global_EN-CoT - naive_average gen 60.79
|
||||||
|
# charm-rea-Global_XLT - naive_average gen 63.36
|
||||||
|
# charm-rea-Global_Translate-EN - naive_average gen 64.10
|
||||||
|
98
configs/summarizers/charm_rea.py
Normal file
98
configs/summarizers/charm_rea.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .groups.charm_rea import charm_rea_summary_groups
|
||||||
|
|
||||||
|
summarizer = dict(
|
||||||
|
dataset_abbrs=[
|
||||||
|
'charm-rea-Direct',
|
||||||
|
'charm-rea-ZH-CoT',
|
||||||
|
'charm-rea-EN-CoT',
|
||||||
|
'charm-rea-XLT',
|
||||||
|
'charm-rea-Translate-EN',
|
||||||
|
'',
|
||||||
|
'charm-rea-Chinese_Direct',
|
||||||
|
'charm-rea-Chinese_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_EN-CoT',
|
||||||
|
'charm-rea-Chinese_XLT',
|
||||||
|
'charm-rea-Chinese_Translate-EN',
|
||||||
|
'charm-rea-Global_Direct',
|
||||||
|
'charm-rea-Global_ZH-CoT',
|
||||||
|
'charm-rea-Global_EN-CoT',
|
||||||
|
'charm-rea-Global_XLT',
|
||||||
|
'charm-rea-Global_Translate-EN',
|
||||||
|
'',
|
||||||
|
'charm-rea-Chinese_Anachronisms_Judgment_Direct',
|
||||||
|
'charm-rea-Chinese_Movie_and_Music_Recommendation_Direct',
|
||||||
|
'charm-rea-Chinese_Natural_Language_Inference_Direct',
|
||||||
|
'charm-rea-Chinese_Reading_Comprehension_Direct',
|
||||||
|
'charm-rea-Chinese_Sequence_Understanding_Direct',
|
||||||
|
'charm-rea-Chinese_Sport_Understanding_Direct',
|
||||||
|
'charm-rea-Chinese_Time_Understanding_Direct',
|
||||||
|
'charm-rea-Global_Anachronisms_Judgment_Direct',
|
||||||
|
'charm-rea-Global_Movie_and_Music_Recommendation_Direct',
|
||||||
|
'charm-rea-Global_Natural_Language_Inference_Direct',
|
||||||
|
'charm-rea-Global_Reading_Comprehension_Direct',
|
||||||
|
'charm-rea-Global_Sequence_Understanding_Direct',
|
||||||
|
'charm-rea-Global_Sport_Understanding_Direct',
|
||||||
|
'charm-rea-Global_Time_Understanding_Direct',
|
||||||
|
'charm-rea-Chinese_Anachronisms_Judgment_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Movie_and_Music_Recommendation_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Natural_Language_Inference_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Reading_Comprehension_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Sequence_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Sport_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Time_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Global_Anachronisms_Judgment_ZH-CoT',
|
||||||
|
'charm-rea-Global_Movie_and_Music_Recommendation_ZH-CoT',
|
||||||
|
'charm-rea-Global_Natural_Language_Inference_ZH-CoT',
|
||||||
|
'charm-rea-Global_Reading_Comprehension_ZH-CoT',
|
||||||
|
'charm-rea-Global_Sequence_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Global_Sport_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Global_Time_Understanding_ZH-CoT',
|
||||||
|
'charm-rea-Chinese_Anachronisms_Judgment_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Movie_and_Music_Recommendation_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Natural_Language_Inference_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Reading_Comprehension_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Sequence_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Sport_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Time_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Global_Anachronisms_Judgment_EN-CoT',
|
||||||
|
'charm-rea-Global_Movie_and_Music_Recommendation_EN-CoT',
|
||||||
|
'charm-rea-Global_Natural_Language_Inference_EN-CoT',
|
||||||
|
'charm-rea-Global_Reading_Comprehension_EN-CoT',
|
||||||
|
'charm-rea-Global_Sequence_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Global_Sport_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Global_Time_Understanding_EN-CoT',
|
||||||
|
'charm-rea-Chinese_Anachronisms_Judgment_XLT',
|
||||||
|
'charm-rea-Chinese_Movie_and_Music_Recommendation_XLT',
|
||||||
|
'charm-rea-Chinese_Natural_Language_Inference_XLT',
|
||||||
|
'charm-rea-Chinese_Reading_Comprehension_XLT',
|
||||||
|
'charm-rea-Chinese_Sequence_Understanding_XLT',
|
||||||
|
'charm-rea-Chinese_Sport_Understanding_XLT',
|
||||||
|
'charm-rea-Chinese_Time_Understanding_XLT',
|
||||||
|
'charm-rea-Global_Anachronisms_Judgment_XLT',
|
||||||
|
'charm-rea-Global_Movie_and_Music_Recommendation_XLT',
|
||||||
|
'charm-rea-Global_Natural_Language_Inference_XLT',
|
||||||
|
'charm-rea-Global_Reading_Comprehension_XLT',
|
||||||
|
'charm-rea-Global_Sequence_Understanding_XLT',
|
||||||
|
'charm-rea-Global_Sport_Understanding_XLT',
|
||||||
|
'charm-rea-Global_Time_Understanding_XLT',
|
||||||
|
'charm-rea-Chinese_Anachronisms_Judgment_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Movie_and_Music_Recommendation_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Natural_Language_Inference_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Reading_Comprehension_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Sequence_Understanding_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Sport_Understanding_Translate-EN',
|
||||||
|
'charm-rea-Chinese_Time_Understanding_Translate-EN',
|
||||||
|
'charm-rea-Global_Anachronisms_Judgment_Translate-EN',
|
||||||
|
'charm-rea-Global_Movie_and_Music_Recommendation_Translate-EN',
|
||||||
|
'charm-rea-Global_Natural_Language_Inference_Translate-EN',
|
||||||
|
'charm-rea-Global_Reading_Comprehension_Translate-EN',
|
||||||
|
'charm-rea-Global_Sequence_Understanding_Translate-EN',
|
||||||
|
'charm-rea-Global_Sport_Understanding_Translate-EN',
|
||||||
|
'charm-rea-Global_Time_Understanding_Translate-EN',
|
||||||
|
],
|
||||||
|
summary_groups=sum(
|
||||||
|
[v for k, v in locals().items() if k.endswith('_summary_groups')], [])
|
||||||
|
)
|
30
configs/summarizers/groups/charm_rea.py
Normal file
30
configs/summarizers/groups/charm_rea.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
charm_tasks = [
|
||||||
|
'Anachronisms_Judgment',
|
||||||
|
'Movie_and_Music_Recommendation',
|
||||||
|
'Natural_Language_Inference',
|
||||||
|
'Reading_Comprehension',
|
||||||
|
'Sequence_Understanding',
|
||||||
|
'Sport_Understanding',
|
||||||
|
'Time_Understanding',
|
||||||
|
]
|
||||||
|
regions = [
|
||||||
|
'Chinese',
|
||||||
|
'Global',
|
||||||
|
]
|
||||||
|
prompts = [
|
||||||
|
'Direct',
|
||||||
|
'ZH-CoT',
|
||||||
|
'EN-CoT',
|
||||||
|
'XLT',
|
||||||
|
'Translate-EN',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
charm_rea_summary_groups = []
|
||||||
|
for prompt in prompts:
|
||||||
|
for region in regions:
|
||||||
|
subsets = ['charm-rea-' + region + '_' + task + '_' + prompt for task in charm_tasks]
|
||||||
|
charm_rea_summary_groups.append({'name': 'charm-rea-' + region + '_' + prompt, 'subsets': subsets})
|
||||||
|
for prompt in prompts:
|
||||||
|
subsets = ['charm-rea-' + region + '_' + prompt for region in regions]
|
||||||
|
charm_rea_summary_groups.append({'name': 'charm-rea-' + prompt, 'subsets': subsets})
|
Loading…
Reference in New Issue
Block a user