From 9b9855a00816576b191d05e35ea45d9bb4cff7d5 Mon Sep 17 00:00:00 2001 From: Xu Song Date: Fri, 26 Jul 2024 11:50:41 +0800 Subject: [PATCH] Add `en` and `zh` groups to longbench summarizer; Fix longbench overall score (#1216) * Add longbench groups * update * update --- configs/summarizers/example.py | 1 + configs/summarizers/groups/longbench.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/configs/summarizers/example.py b/configs/summarizers/example.py index 3d51648b..937acfba 100644 --- a/configs/summarizers/example.py +++ b/configs/summarizers/example.py @@ -12,6 +12,7 @@ with read_base(): from .groups.xiezhi import xiezhi_summary_groups from .groups.scibench import scibench_summary_groups from .groups.mgsm import mgsm_summary_groups + from .groups.longbench import longbench_summary_groups summarizer = dict( summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []), diff --git a/configs/summarizers/groups/longbench.py b/configs/summarizers/groups/longbench.py index 35930114..c5a129f5 100644 --- a/configs/summarizers/groups/longbench.py +++ b/configs/summarizers/groups/longbench.py @@ -5,6 +5,18 @@ longbench_summary_groups = [ {'name': 'longbench_few-shot-learning', 'subsets': ['LongBench_trec', 'LongBench_triviaqa', 'LongBench_samsum', 'LongBench_lsht']}, {'name': 'longbench_synthetic-tasks', 'subsets': ['LongBench_passage_count', 'LongBench_passage_retrieval_en', 'LongBench_passage_retrieval_zh']}, {'name': 'longbench_code-completion', 'subsets': ['LongBench_lcc', 'LongBench_repobench-p']}, - {'name': 'longbench_code-completion', 'subsets': ['LongBench_lcc', 'LongBench_repobench-p']}, - {'name': 'longbench', 'subsets': ['longbench_single-document-qa', 'longbench_multi-document-qa', 'longbench_summarization', 'longbench_few-shot-learning', 'longbench_synthetic-tasks', 'longbench_code-completion', 'longbench_code-completion']}, + + # code tasks are included in both longbench_zh and longbench_en + {'name': 'longbench_zh', 'subsets': ['LongBench_multifieldqa_zh', 'LongBench_dureader', 'LongBench_vcsum', + 'LongBench_lsht', 'LongBench_passage_retrieval_zh', + 'LongBench_lcc', 'LongBench_repobench-p']}, + {'name': 'longbench_en', 'subsets': [ + 'LongBench_narrativeqa', 'LongBench_qasper', 'LongBench_multifieldqa_en', + 'LongBench_hotpotqa', 'LongBench_2wikimqa', 'LongBench_musique', + 'LongBench_gov_report', 'LongBench_qmsum', 'LongBench_multi_news', + 'LongBench_trec', 'LongBench_triviaqa', 'LongBench_samsum', + 'LongBench_passage_count', 'LongBench_passage_retrieval_en', + 'LongBench_lcc', 'LongBench_repobench-p' + ]}, + {'name': 'longbench', 'subsets': ['longbench_single-document-qa', 'longbench_multi-document-qa', 'longbench_summarization', 'longbench_few-shot-learning', 'longbench_synthetic-tasks', 'longbench_code-completion']}, ]