Add en and zh groups to longbench summarizer; Fix longbench overall score (#1216)

* Add longbench groups

* update

* update
This commit is contained in:
Xu Song 2024-07-26 11:50:41 +08:00 committed by GitHub
parent edd0ffdf70
commit 9b9855a008
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 2 deletions

View File

@ -12,6 +12,7 @@ with read_base():
from .groups.xiezhi import xiezhi_summary_groups
from .groups.scibench import scibench_summary_groups
from .groups.mgsm import mgsm_summary_groups
from .groups.longbench import longbench_summary_groups
summarizer = dict(
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),

View File

@ -5,6 +5,18 @@ longbench_summary_groups = [
{'name': 'longbench_few-shot-learning', 'subsets': ['LongBench_trec', 'LongBench_triviaqa', 'LongBench_samsum', 'LongBench_lsht']},
{'name': 'longbench_synthetic-tasks', 'subsets': ['LongBench_passage_count', 'LongBench_passage_retrieval_en', 'LongBench_passage_retrieval_zh']},
{'name': 'longbench_code-completion', 'subsets': ['LongBench_lcc', 'LongBench_repobench-p']},
{'name': 'longbench_code-completion', 'subsets': ['LongBench_lcc', 'LongBench_repobench-p']},
{'name': 'longbench', 'subsets': ['longbench_single-document-qa', 'longbench_multi-document-qa', 'longbench_summarization', 'longbench_few-shot-learning', 'longbench_synthetic-tasks', 'longbench_code-completion', 'longbench_code-completion']},
# code tasks are included in both longbench_zh and longbench_en
{'name': 'longbench_zh', 'subsets': ['LongBench_multifieldqa_zh', 'LongBench_dureader', 'LongBench_vcsum',
'LongBench_lsht', 'LongBench_passage_retrieval_zh',
'LongBench_lcc', 'LongBench_repobench-p']},
{'name': 'longbench_en', 'subsets': [
'LongBench_narrativeqa', 'LongBench_qasper', 'LongBench_multifieldqa_en',
'LongBench_hotpotqa', 'LongBench_2wikimqa', 'LongBench_musique',
'LongBench_gov_report', 'LongBench_qmsum', 'LongBench_multi_news',
'LongBench_trec', 'LongBench_triviaqa', 'LongBench_samsum',
'LongBench_passage_count', 'LongBench_passage_retrieval_en',
'LongBench_lcc', 'LongBench_repobench-p'
]},
{'name': 'longbench', 'subsets': ['longbench_single-document-qa', 'longbench_multi-document-qa', 'longbench_summarization', 'longbench_few-shot-learning', 'longbench_synthetic-tasks', 'longbench_code-completion']},
]