From 87ffa71d68c9084ac284a14980c90355c838b417 Mon Sep 17 00:00:00 2001 From: Linchen Xiao Date: Fri, 6 Sep 2024 15:50:12 +0800 Subject: [PATCH] [Feature] Longbench dataset update --- .../longbench_2wikimqa_gen_6b3efc.py | 21 +++++++++------ .../longbench_dureader_gen_c6c7e4.py | 21 +++++++++------ .../longbench_gov_report_gen_54c5b0.py | 21 +++++++++------ .../longbench_hotpotqa_gen_6b3efc.py | 21 +++++++++------ .../longbenchlcc/longbench_lcc_gen_6ba507.py | 21 +++++++++------ .../longbench_lsht_gen_e8a339.py | 24 ++++++++++++----- .../longbench_multi_news_gen_6f9da9.py | 21 +++++++++------ .../longbench_multifieldqa_en_gen_d3838e.py | 21 +++++++++------ .../longbench_multifieldqa_zh_gen_e9a7ef.py | 21 +++++++++------ .../longbench_musique_gen_6b3efc.py | 21 +++++++++------ .../longbench_narrativeqa_gen_a68305.py | 21 +++++++++------ .../longbench_passage_count_gen_dcdaab.py | 21 +++++++++------ ...ngbench_passage_retrieval_en_gen_734db5.py | 26 ++++++++++++------- ...ngbench_passage_retrieval_zh_gen_01cca2.py | 26 ++++++++++++------- .../longbench_qasper_gen_6b3efc.py | 21 +++++++++------ .../longbench_qmsum_gen_d33331.py | 21 +++++++++------ .../longbench_repobench_gen_6df953.py | 21 +++++++++------ .../longbench_samsum_gen_f4416d.py | 24 ++++++++++++----- .../longbench_trec_gen_824187.py | 24 ++++++++++++----- .../longbench_triviaqa_gen_d30cb9.py | 24 ++++++++++++----- .../longbench_vcsum_gen_f7a8ac.py | 21 +++++++++------ .../longbench_2wikimqa_gen_6b3efc.py | 21 +++++++++------ .../longbench_dureader_gen_c6c7e4.py | 21 +++++++++------ .../longbench_gov_report_gen_54c5b0.py | 21 +++++++++------ .../longbench_hotpotqa_gen_6b3efc.py | 21 +++++++++------ .../longbenchlcc/longbench_lcc_gen_6ba507.py | 21 +++++++++------ .../longbench_lsht_gen_e8a339.py | 24 ++++++++++++----- .../longbench_multi_news_gen_6f9da9.py | 21 +++++++++------ .../longbench_multifieldqa_en_gen_d3838e.py | 21 +++++++++------ .../longbench_multifieldqa_zh_gen_e9a7ef.py | 21 +++++++++------ .../longbench_musique_gen_6b3efc.py | 21 +++++++++------ .../longbench_narrativeqa_gen_a68305.py | 21 +++++++++------ .../longbench_passage_count_gen_dcdaab.py | 21 +++++++++------ ...ngbench_passage_retrieval_en_gen_734db5.py | 26 ++++++++++++------- ...ngbench_passage_retrieval_zh_gen_01cca2.py | 26 ++++++++++++------- .../longbench_qasper_gen_6b3efc.py | 21 +++++++++------ .../longbench_qmsum_gen_d33331.py | 21 +++++++++------ .../longbench_repobench_gen_6df953.py | 21 +++++++++------ .../longbench_samsum_gen_f4416d.py | 24 ++++++++++++----- .../longbench_trec_gen_824187.py | 24 ++++++++++++----- .../longbench_triviaqa_gen_d30cb9.py | 24 ++++++++++++----- .../longbench_vcsum_gen_f7a8ac.py | 21 +++++++++------ .../datasets/longbench/longbench_2wikim_qa.py | 11 ++++---- .../datasets/longbench/longbench_dureader.py | 11 ++++---- .../longbench/longbench_gov_report.py | 11 ++++---- .../datasets/longbench/longbench_hotpot_qa.py | 11 ++++---- .../datasets/longbench/longbench_lcc.py | 11 ++++---- .../datasets/longbench/longbench_lsht.py | 11 ++++---- .../longbench/longbench_multi_news.py | 11 ++++---- .../longbench/longbench_multifieldqa_en.py | 11 ++++---- .../longbench/longbench_multifieldqa_zh.py | 11 ++++---- .../datasets/longbench/longbench_musique.py | 11 ++++---- .../longbench/longbench_narrative_qa.py | 11 ++++---- .../longbench/longbench_passage_count.py | 11 ++++---- .../longbench_passage_retrieval_en.py | 11 ++++---- .../longbench_passage_retrieval_zh.py | 11 ++++---- .../datasets/longbench/longbench_qasper.py | 11 ++++---- .../datasets/longbench/longbench_qmsum.py | 11 ++++---- .../datasets/longbench/longbench_repobench.py | 11 ++++---- .../datasets/longbench/longbench_samsum.py | 11 ++++---- .../datasets/longbench/longbench_trec.py | 11 ++++---- .../datasets/longbench/longbench_trivia_qa.py | 11 ++++---- .../datasets/longbench/longbench_vcsum.py | 11 ++++---- opencompass/utils/datasets_info.py | 10 +++++++ 64 files changed, 730 insertions(+), 437 deletions(-) diff --git a/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py b/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py index 9faa9e72..befe5b7c 100644 --- a/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py +++ b/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_2wikimqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_2wikimqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_2wikimqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_2wikimqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_2wikimqa_datasets = [ dict( type=LongBench2wikimqaDataset, abbr='LongBench_2wikimqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='2wikimqa', reader_cfg=LongBench_2wikimqa_reader_cfg, infer_cfg=LongBench_2wikimqa_infer_cfg, - eval_cfg=LongBench_2wikimqa_eval_cfg) + eval_cfg=LongBench_2wikimqa_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py b/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py index 70e8b0a3..37c8af44 100644 --- a/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py +++ b/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py @@ -7,7 +7,7 @@ LongBench_dureader_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_dureader_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_dureader_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:'), - ], )), + dict( + role='HUMAN', + prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_dureader_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT' ) LongBench_dureader_datasets = [ dict( type=LongBenchdureaderDataset, abbr='LongBench_dureader', - path='THUDM/LongBench', + path='opencompass/Longbench', name='dureader', reader_cfg=LongBench_dureader_reader_cfg, infer_cfg=LongBench_dureader_infer_cfg, - eval_cfg=LongBench_dureader_eval_cfg) + eval_cfg=LongBench_dureader_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py b/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py index 2af83b07..13696c2f 100644 --- a/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py +++ b/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py @@ -7,7 +7,7 @@ LongBench_gov_report_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_gov_report_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_gov_report_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_gov_report_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_gov_report_datasets = [ dict( type=LongBenchgov_reportDataset, abbr='LongBench_gov_report', - path='THUDM/LongBench', + path='opencompass/Longbench', name='gov_report', reader_cfg=LongBench_gov_report_reader_cfg, infer_cfg=LongBench_gov_report_infer_cfg, - eval_cfg=LongBench_gov_report_eval_cfg) + eval_cfg=LongBench_gov_report_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py b/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py index fd37de2b..a9ad31f3 100644 --- a/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py +++ b/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_hotpotqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_hotpotqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_hotpotqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_hotpotqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_hotpotqa_datasets = [ dict( type=LongBenchhotpotqaDataset, abbr='LongBench_hotpotqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='hotpotqa', reader_cfg=LongBench_hotpotqa_reader_cfg, infer_cfg=LongBench_hotpotqa_infer_cfg, - eval_cfg=LongBench_hotpotqa_eval_cfg) + eval_cfg=LongBench_hotpotqa_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py b/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py index 99e1989c..add3efad 100644 --- a/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py +++ b/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py @@ -7,7 +7,7 @@ LongBench_lcc_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_lcc_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_lcc_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please complete the code given below. \n{context}Next line of code:\n'), - ], )), + dict( + role='HUMAN', + prompt='Please complete the code given below. \n{context}Next line of code:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_lcc_eval_cfg = dict( - evaluator=dict(type=LongBenchCodeSimEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT' ) LongBench_lcc_datasets = [ dict( type=LongBenchlccDataset, abbr='LongBench_lcc', - path='THUDM/LongBench', + path='opencompass/Longbench', name='lcc', reader_cfg=LongBench_lcc_reader_cfg, infer_cfg=LongBench_lcc_infer_cfg, - eval_cfg=LongBench_lcc_eval_cfg) + eval_cfg=LongBench_lcc_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py b/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py index 9ebb82b3..cfb13c5c 100644 --- a/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py +++ b/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchlshtDataset, lsht_postprocess +from opencompass.datasets import ( + LongBenchClassificationEvaluator, + LongBenchlshtDataset, + lsht_postprocess, +) LongBench_lsht_reader_cfg = dict( input_columns=['context', 'input'], output_column='all_labels', train_split='test', - test_split='test' + test_split='test', ) LongBench_lsht_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_lsht_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_lsht_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_lsht_datasets = [ dict( type=LongBenchlshtDataset, abbr='LongBench_lsht', - path='THUDM/LongBench', + path='opencompass/Longbench', name='lsht', reader_cfg=LongBench_lsht_reader_cfg, infer_cfg=LongBench_lsht_infer_cfg, - eval_cfg=LongBench_lsht_eval_cfg) + eval_cfg=LongBench_lsht_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py b/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py index b4dd99b3..d89be47c 100644 --- a/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py +++ b/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py @@ -7,7 +7,7 @@ LongBench_multi_news_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multi_news_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multi_news_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n'), - ], )), + dict( + role='HUMAN', + prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_multi_news_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_multi_news_datasets = [ dict( type=LongBenchmulti_newsDataset, abbr='LongBench_multi_news', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multi_news', reader_cfg=LongBench_multi_news_reader_cfg, infer_cfg=LongBench_multi_news_infer_cfg, - eval_cfg=LongBench_multi_news_eval_cfg) + eval_cfg=LongBench_multi_news_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py b/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py index cd92fb0c..a836fb52 100644 --- a/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py +++ b/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py @@ -7,7 +7,7 @@ LongBench_multifieldqa_en_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multifieldqa_en_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multifieldqa_en_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_multifieldqa_en_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_multifieldqa_en_datasets = [ dict( type=LongBenchmultifieldqa_enDataset, abbr='LongBench_multifieldqa_en', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multifieldqa_en', reader_cfg=LongBench_multifieldqa_en_reader_cfg, infer_cfg=LongBench_multifieldqa_en_infer_cfg, - eval_cfg=LongBench_multifieldqa_en_eval_cfg) + eval_cfg=LongBench_multifieldqa_en_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py b/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py index 64d932a8..49b9d22c 100644 --- a/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py +++ b/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py @@ -7,7 +7,7 @@ LongBench_multifieldqa_zh_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multifieldqa_zh_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multifieldqa_zh_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:'), - ], )), + dict( + role='HUMAN', + prompt='阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_multifieldqa_zh_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator, language='zh'), pred_role='BOT' ) LongBench_multifieldqa_zh_datasets = [ dict( type=LongBenchmultifieldqa_zhDataset, abbr='LongBench_multifieldqa_zh', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multifieldqa_zh', reader_cfg=LongBench_multifieldqa_zh_reader_cfg, infer_cfg=LongBench_multifieldqa_zh_infer_cfg, - eval_cfg=LongBench_multifieldqa_zh_eval_cfg) + eval_cfg=LongBench_multifieldqa_zh_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py b/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py index 714775fd..77e30795 100644 --- a/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py +++ b/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_musique_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_musique_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_musique_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_musique_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_musique_datasets = [ dict( type=LongBenchmusiqueDataset, abbr='LongBench_musique', - path='THUDM/LongBench', + path='opencompass/Longbench', name='musique', reader_cfg=LongBench_musique_reader_cfg, infer_cfg=LongBench_musique_infer_cfg, - eval_cfg=LongBench_musique_eval_cfg) + eval_cfg=LongBench_musique_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py b/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py index 8910d4f2..48dccb6b 100644 --- a/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py +++ b/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py @@ -7,7 +7,7 @@ LongBench_narrativeqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_narrativeqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_narrativeqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_narrativeqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_narrativeqa_datasets = [ dict( type=LongBenchnarrativeqaDataset, abbr='LongBench_narrativeqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='narrativeqa', reader_cfg=LongBench_narrativeqa_reader_cfg, infer_cfg=LongBench_narrativeqa_infer_cfg, - eval_cfg=LongBench_narrativeqa_eval_cfg) + eval_cfg=LongBench_narrativeqa_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py b/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py index 4b7e5575..1bdb42e7 100644 --- a/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py +++ b/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py @@ -7,7 +7,7 @@ LongBench_passage_count_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_count_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_passage_count_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: '), - ], )), + dict( + role='HUMAN', + prompt='There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_count_eval_cfg = dict( - evaluator=dict(type=LongBenchCountEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCountEvaluator), pred_role='BOT' ) LongBench_passage_count_datasets = [ dict( type=LongBenchpassage_countDataset, abbr='LongBench_passage_count', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_count', reader_cfg=LongBench_passage_count_reader_cfg, infer_cfg=LongBench_passage_count_infer_cfg, - eval_cfg=LongBench_passage_count_eval_cfg) + eval_cfg=LongBench_passage_count_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py b/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py index b24f8b8c..4b5bfe62 100644 --- a/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py +++ b/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py @@ -1,13 +1,16 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRetrievalEvaluator, LongBenchpassage_retrieval_enDataset +from opencompass.datasets import ( + LongBenchRetrievalEvaluator, + LongBenchpassage_retrieval_enDataset, +) LongBench_passage_retrieval_en_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_retrieval_en_infer_cfg = dict( @@ -15,24 +18,29 @@ LongBench_passage_retrieval_en_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like \"Paragraph 1\", \"Paragraph 2\", etc.\n\nThe answer is: '), - ], )), + dict( + role='HUMAN', + prompt='Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_retrieval_en_eval_cfg = dict( - evaluator=dict(type=LongBenchRetrievalEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRetrievalEvaluator), pred_role='BOT' ) LongBench_passage_retrieval_en_datasets = [ dict( type=LongBenchpassage_retrieval_enDataset, abbr='LongBench_passage_retrieval_en', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_retrieval_en', reader_cfg=LongBench_passage_retrieval_en_reader_cfg, infer_cfg=LongBench_passage_retrieval_en_infer_cfg, - eval_cfg=LongBench_passage_retrieval_en_eval_cfg) + eval_cfg=LongBench_passage_retrieval_en_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py b/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py index 13f80517..8855986d 100644 --- a/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py +++ b/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py @@ -1,13 +1,16 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRetrievalEvaluator, LongBenchpassage_retrieval_zhDataset +from opencompass.datasets import ( + LongBenchRetrievalEvaluator, + LongBenchpassage_retrieval_zhDataset, +) LongBench_passage_retrieval_zh_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_retrieval_zh_infer_cfg = dict( @@ -15,24 +18,29 @@ LongBench_passage_retrieval_zh_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='以下是若干段落文字,以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是\"段落1\",\"段落2\"等格式\n\n答案是:'), - ], )), + dict( + role='HUMAN', + prompt='以下是若干段落文字,以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1","段落2"等格式\n\n答案是:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_retrieval_zh_eval_cfg = dict( - evaluator=dict(type=LongBenchRetrievalEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRetrievalEvaluator, language='zh'), pred_role='BOT' ) LongBench_passage_retrieval_zh_datasets = [ dict( type=LongBenchpassage_retrieval_zhDataset, abbr='LongBench_passage_retrieval_zh', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_retrieval_zh', reader_cfg=LongBench_passage_retrieval_zh_reader_cfg, infer_cfg=LongBench_passage_retrieval_zh_infer_cfg, - eval_cfg=LongBench_passage_retrieval_zh_eval_cfg) + eval_cfg=LongBench_passage_retrieval_zh_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py b/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py index ea330bc9..d0090960 100644 --- a/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py +++ b/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_qasper_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_qasper_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_qasper_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_qasper_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_qasper_datasets = [ dict( type=LongBenchqasperDataset, abbr='LongBench_qasper', - path='THUDM/LongBench', + path='opencompass/Longbench', name='qasper', reader_cfg=LongBench_qasper_reader_cfg, infer_cfg=LongBench_qasper_infer_cfg, - eval_cfg=LongBench_qasper_eval_cfg) + eval_cfg=LongBench_qasper_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py b/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py index 396a998f..682b380f 100644 --- a/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py +++ b/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py @@ -7,7 +7,7 @@ LongBench_qmsum_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_qmsum_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_qmsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_qmsum_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_qmsum_datasets = [ dict( type=LongBenchqmsumDataset, abbr='LongBench_qmsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='qmsum', reader_cfg=LongBench_qmsum_reader_cfg, infer_cfg=LongBench_qmsum_infer_cfg, - eval_cfg=LongBench_qmsum_eval_cfg) + eval_cfg=LongBench_qmsum_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py b/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py index e04b3d34..84f0fff8 100644 --- a/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py +++ b/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py @@ -7,7 +7,7 @@ LongBench_repobench_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_repobench_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_repobench_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please complete the code given below. \n{context}{input}Next line of code:\n'), - ], )), + dict( + role='HUMAN', + prompt='Please complete the code given below. \n{context}{input}Next line of code:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_repobench_eval_cfg = dict( - evaluator=dict(type=LongBenchCodeSimEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT' ) LongBench_repobench_datasets = [ dict( type=LongBenchrepobenchDataset, abbr='LongBench_repobench-p', - path='THUDM/LongBench', + path='opencompass/Longbench', name='repobench-p', reader_cfg=LongBench_repobench_reader_cfg, infer_cfg=LongBench_repobench_infer_cfg, - eval_cfg=LongBench_repobench_eval_cfg) + eval_cfg=LongBench_repobench_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py b/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py index 51d2f74a..37f617b7 100644 --- a/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py +++ b/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRougeEvaluator, LongBenchsamsumDataset, samsum_postprocess +from opencompass.datasets import ( + LongBenchRougeEvaluator, + LongBenchsamsumDataset, + samsum_postprocess, +) LongBench_samsum_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_samsum_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_samsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_samsum_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_samsum_datasets = [ dict( type=LongBenchsamsumDataset, abbr='LongBench_samsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='samsum', reader_cfg=LongBench_samsum_reader_cfg, infer_cfg=LongBench_samsum_infer_cfg, - eval_cfg=LongBench_samsum_eval_cfg) + eval_cfg=LongBench_samsum_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py b/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py index 66719fb9..ca44b384 100644 --- a/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py +++ b/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchtrecDataset, trec_postprocess +from opencompass.datasets import ( + LongBenchClassificationEvaluator, + LongBenchtrecDataset, + trec_postprocess, +) LongBench_trec_reader_cfg = dict( input_columns=['context', 'input'], output_column='all_labels', train_split='test', - test_split='test' + test_split='test', ) LongBench_trec_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_trec_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_trec_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_trec_datasets = [ dict( type=LongBenchtrecDataset, abbr='LongBench_trec', - path='THUDM/LongBench', + path='opencompass/Longbench', name='trec', reader_cfg=LongBench_trec_reader_cfg, infer_cfg=LongBench_trec_infer_cfg, - eval_cfg=LongBench_trec_eval_cfg) + eval_cfg=LongBench_trec_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py b/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py index 2cfb7fc1..73b1a364 100644 --- a/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py +++ b/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchF1Evaluator, LongBenchtriviaqaDataset, triviaqa_postprocess +from opencompass.datasets import ( + LongBenchF1Evaluator, + LongBenchtriviaqaDataset, + triviaqa_postprocess, +) LongBench_triviaqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_triviaqa_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_triviaqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_triviaqa_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_triviaqa_datasets = [ dict( type=LongBenchtriviaqaDataset, abbr='LongBench_triviaqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='triviaqa', reader_cfg=LongBench_triviaqa_reader_cfg, infer_cfg=LongBench_triviaqa_infer_cfg, - eval_cfg=LongBench_triviaqa_eval_cfg) + eval_cfg=LongBench_triviaqa_eval_cfg, + ) ] diff --git a/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py b/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py index 1264940c..8b10b917 100644 --- a/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py +++ b/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py @@ -7,7 +7,7 @@ LongBench_vcsum_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_vcsum_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_vcsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='下面有一段会议记录,请你阅读后,写一段总结,总结会议的内容。\n会议记录:\n{context}\n\n会议总结:'), - ], )), + dict( + role='HUMAN', + prompt='下面有一段会议记录,请你阅读后,写一段总结,总结会议的内容。\n会议记录:\n{context}\n\n会议总结:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_vcsum_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT' ) LongBench_vcsum_datasets = [ dict( type=LongBenchvcsumDataset, abbr='LongBench_vcsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='vcsum', reader_cfg=LongBench_vcsum_reader_cfg, infer_cfg=LongBench_vcsum_infer_cfg, - eval_cfg=LongBench_vcsum_eval_cfg) + eval_cfg=LongBench_vcsum_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py b/opencompass/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py index 9faa9e72..befe5b7c 100644 --- a/opencompass/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py +++ b/opencompass/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_2wikimqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_2wikimqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_2wikimqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_2wikimqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_2wikimqa_datasets = [ dict( type=LongBench2wikimqaDataset, abbr='LongBench_2wikimqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='2wikimqa', reader_cfg=LongBench_2wikimqa_reader_cfg, infer_cfg=LongBench_2wikimqa_infer_cfg, - eval_cfg=LongBench_2wikimqa_eval_cfg) + eval_cfg=LongBench_2wikimqa_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py b/opencompass/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py index 70e8b0a3..37c8af44 100644 --- a/opencompass/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py +++ b/opencompass/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py @@ -7,7 +7,7 @@ LongBench_dureader_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_dureader_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_dureader_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:'), - ], )), + dict( + role='HUMAN', + prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_dureader_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT' ) LongBench_dureader_datasets = [ dict( type=LongBenchdureaderDataset, abbr='LongBench_dureader', - path='THUDM/LongBench', + path='opencompass/Longbench', name='dureader', reader_cfg=LongBench_dureader_reader_cfg, infer_cfg=LongBench_dureader_infer_cfg, - eval_cfg=LongBench_dureader_eval_cfg) + eval_cfg=LongBench_dureader_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py b/opencompass/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py index 2af83b07..13696c2f 100644 --- a/opencompass/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py +++ b/opencompass/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py @@ -7,7 +7,7 @@ LongBench_gov_report_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_gov_report_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_gov_report_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_gov_report_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_gov_report_datasets = [ dict( type=LongBenchgov_reportDataset, abbr='LongBench_gov_report', - path='THUDM/LongBench', + path='opencompass/Longbench', name='gov_report', reader_cfg=LongBench_gov_report_reader_cfg, infer_cfg=LongBench_gov_report_infer_cfg, - eval_cfg=LongBench_gov_report_eval_cfg) + eval_cfg=LongBench_gov_report_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py b/opencompass/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py index fd37de2b..a9ad31f3 100644 --- a/opencompass/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py +++ b/opencompass/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_hotpotqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_hotpotqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_hotpotqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_hotpotqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_hotpotqa_datasets = [ dict( type=LongBenchhotpotqaDataset, abbr='LongBench_hotpotqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='hotpotqa', reader_cfg=LongBench_hotpotqa_reader_cfg, infer_cfg=LongBench_hotpotqa_infer_cfg, - eval_cfg=LongBench_hotpotqa_eval_cfg) + eval_cfg=LongBench_hotpotqa_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py b/opencompass/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py index 99e1989c..add3efad 100644 --- a/opencompass/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py +++ b/opencompass/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py @@ -7,7 +7,7 @@ LongBench_lcc_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_lcc_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_lcc_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please complete the code given below. \n{context}Next line of code:\n'), - ], )), + dict( + role='HUMAN', + prompt='Please complete the code given below. \n{context}Next line of code:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_lcc_eval_cfg = dict( - evaluator=dict(type=LongBenchCodeSimEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT' ) LongBench_lcc_datasets = [ dict( type=LongBenchlccDataset, abbr='LongBench_lcc', - path='THUDM/LongBench', + path='opencompass/Longbench', name='lcc', reader_cfg=LongBench_lcc_reader_cfg, infer_cfg=LongBench_lcc_infer_cfg, - eval_cfg=LongBench_lcc_eval_cfg) + eval_cfg=LongBench_lcc_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py b/opencompass/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py index 9ebb82b3..cfb13c5c 100644 --- a/opencompass/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py +++ b/opencompass/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchlshtDataset, lsht_postprocess +from opencompass.datasets import ( + LongBenchClassificationEvaluator, + LongBenchlshtDataset, + lsht_postprocess, +) LongBench_lsht_reader_cfg = dict( input_columns=['context', 'input'], output_column='all_labels', train_split='test', - test_split='test' + test_split='test', ) LongBench_lsht_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_lsht_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_lsht_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_lsht_datasets = [ dict( type=LongBenchlshtDataset, abbr='LongBench_lsht', - path='THUDM/LongBench', + path='opencompass/Longbench', name='lsht', reader_cfg=LongBench_lsht_reader_cfg, infer_cfg=LongBench_lsht_infer_cfg, - eval_cfg=LongBench_lsht_eval_cfg) + eval_cfg=LongBench_lsht_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py b/opencompass/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py index b4dd99b3..d89be47c 100644 --- a/opencompass/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py +++ b/opencompass/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py @@ -7,7 +7,7 @@ LongBench_multi_news_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multi_news_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multi_news_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n'), - ], )), + dict( + role='HUMAN', + prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_multi_news_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_multi_news_datasets = [ dict( type=LongBenchmulti_newsDataset, abbr='LongBench_multi_news', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multi_news', reader_cfg=LongBench_multi_news_reader_cfg, infer_cfg=LongBench_multi_news_infer_cfg, - eval_cfg=LongBench_multi_news_eval_cfg) + eval_cfg=LongBench_multi_news_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py b/opencompass/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py index cd92fb0c..a836fb52 100644 --- a/opencompass/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py +++ b/opencompass/configs/datasets/longbench/longbenchmultifieldqa_en/longbench_multifieldqa_en_gen_d3838e.py @@ -7,7 +7,7 @@ LongBench_multifieldqa_en_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multifieldqa_en_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multifieldqa_en_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_multifieldqa_en_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_multifieldqa_en_datasets = [ dict( type=LongBenchmultifieldqa_enDataset, abbr='LongBench_multifieldqa_en', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multifieldqa_en', reader_cfg=LongBench_multifieldqa_en_reader_cfg, infer_cfg=LongBench_multifieldqa_en_infer_cfg, - eval_cfg=LongBench_multifieldqa_en_eval_cfg) + eval_cfg=LongBench_multifieldqa_en_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py b/opencompass/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py index 64d932a8..49b9d22c 100644 --- a/opencompass/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py +++ b/opencompass/configs/datasets/longbench/longbenchmultifieldqa_zh/longbench_multifieldqa_zh_gen_e9a7ef.py @@ -7,7 +7,7 @@ LongBench_multifieldqa_zh_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_multifieldqa_zh_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_multifieldqa_zh_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:'), - ], )), + dict( + role='HUMAN', + prompt='阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_multifieldqa_zh_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator, language='zh'), pred_role='BOT' ) LongBench_multifieldqa_zh_datasets = [ dict( type=LongBenchmultifieldqa_zhDataset, abbr='LongBench_multifieldqa_zh', - path='THUDM/LongBench', + path='opencompass/Longbench', name='multifieldqa_zh', reader_cfg=LongBench_multifieldqa_zh_reader_cfg, infer_cfg=LongBench_multifieldqa_zh_infer_cfg, - eval_cfg=LongBench_multifieldqa_zh_eval_cfg) + eval_cfg=LongBench_multifieldqa_zh_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py b/opencompass/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py index 714775fd..77e30795 100644 --- a/opencompass/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py +++ b/opencompass/configs/datasets/longbench/longbenchmusique/longbench_musique_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_musique_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_musique_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_musique_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_musique_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_musique_datasets = [ dict( type=LongBenchmusiqueDataset, abbr='LongBench_musique', - path='THUDM/LongBench', + path='opencompass/Longbench', name='musique', reader_cfg=LongBench_musique_reader_cfg, infer_cfg=LongBench_musique_infer_cfg, - eval_cfg=LongBench_musique_eval_cfg) + eval_cfg=LongBench_musique_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py b/opencompass/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py index 8910d4f2..48dccb6b 100644 --- a/opencompass/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py +++ b/opencompass/configs/datasets/longbench/longbenchnarrativeqa/longbench_narrativeqa_gen_a68305.py @@ -7,7 +7,7 @@ LongBench_narrativeqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_narrativeqa_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_narrativeqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_narrativeqa_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_narrativeqa_datasets = [ dict( type=LongBenchnarrativeqaDataset, abbr='LongBench_narrativeqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='narrativeqa', reader_cfg=LongBench_narrativeqa_reader_cfg, infer_cfg=LongBench_narrativeqa_infer_cfg, - eval_cfg=LongBench_narrativeqa_eval_cfg) + eval_cfg=LongBench_narrativeqa_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py b/opencompass/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py index 4b7e5575..1bdb42e7 100644 --- a/opencompass/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py +++ b/opencompass/configs/datasets/longbench/longbenchpassage_count/longbench_passage_count_gen_dcdaab.py @@ -7,7 +7,7 @@ LongBench_passage_count_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_count_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_passage_count_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: '), - ], )), + dict( + role='HUMAN', + prompt='There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_count_eval_cfg = dict( - evaluator=dict(type=LongBenchCountEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCountEvaluator), pred_role='BOT' ) LongBench_passage_count_datasets = [ dict( type=LongBenchpassage_countDataset, abbr='LongBench_passage_count', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_count', reader_cfg=LongBench_passage_count_reader_cfg, infer_cfg=LongBench_passage_count_infer_cfg, - eval_cfg=LongBench_passage_count_eval_cfg) + eval_cfg=LongBench_passage_count_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py b/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py index b24f8b8c..4b5bfe62 100644 --- a/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py +++ b/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_en/longbench_passage_retrieval_en_gen_734db5.py @@ -1,13 +1,16 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRetrievalEvaluator, LongBenchpassage_retrieval_enDataset +from opencompass.datasets import ( + LongBenchRetrievalEvaluator, + LongBenchpassage_retrieval_enDataset, +) LongBench_passage_retrieval_en_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_retrieval_en_infer_cfg = dict( @@ -15,24 +18,29 @@ LongBench_passage_retrieval_en_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like \"Paragraph 1\", \"Paragraph 2\", etc.\n\nThe answer is: '), - ], )), + dict( + role='HUMAN', + prompt='Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_retrieval_en_eval_cfg = dict( - evaluator=dict(type=LongBenchRetrievalEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRetrievalEvaluator), pred_role='BOT' ) LongBench_passage_retrieval_en_datasets = [ dict( type=LongBenchpassage_retrieval_enDataset, abbr='LongBench_passage_retrieval_en', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_retrieval_en', reader_cfg=LongBench_passage_retrieval_en_reader_cfg, infer_cfg=LongBench_passage_retrieval_en_infer_cfg, - eval_cfg=LongBench_passage_retrieval_en_eval_cfg) + eval_cfg=LongBench_passage_retrieval_en_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py b/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py index 13f80517..8855986d 100644 --- a/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py +++ b/opencompass/configs/datasets/longbench/longbenchpassage_retrieval_zh/longbench_passage_retrieval_zh_gen_01cca2.py @@ -1,13 +1,16 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRetrievalEvaluator, LongBenchpassage_retrieval_zhDataset +from opencompass.datasets import ( + LongBenchRetrievalEvaluator, + LongBenchpassage_retrieval_zhDataset, +) LongBench_passage_retrieval_zh_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_passage_retrieval_zh_infer_cfg = dict( @@ -15,24 +18,29 @@ LongBench_passage_retrieval_zh_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='以下是若干段落文字,以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是\"段落1\",\"段落2\"等格式\n\n答案是:'), - ], )), + dict( + role='HUMAN', + prompt='以下是若干段落文字,以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1","段落2"等格式\n\n答案是:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_passage_retrieval_zh_eval_cfg = dict( - evaluator=dict(type=LongBenchRetrievalEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRetrievalEvaluator, language='zh'), pred_role='BOT' ) LongBench_passage_retrieval_zh_datasets = [ dict( type=LongBenchpassage_retrieval_zhDataset, abbr='LongBench_passage_retrieval_zh', - path='THUDM/LongBench', + path='opencompass/Longbench', name='passage_retrieval_zh', reader_cfg=LongBench_passage_retrieval_zh_reader_cfg, infer_cfg=LongBench_passage_retrieval_zh_infer_cfg, - eval_cfg=LongBench_passage_retrieval_zh_eval_cfg) + eval_cfg=LongBench_passage_retrieval_zh_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py b/opencompass/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py index ea330bc9..d0090960 100644 --- a/opencompass/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py +++ b/opencompass/configs/datasets/longbench/longbenchqasper/longbench_qasper_gen_6b3efc.py @@ -7,7 +7,7 @@ LongBench_qasper_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_qasper_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_qasper_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_qasper_eval_cfg = dict( - evaluator=dict(type=LongBenchF1Evaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT' ) LongBench_qasper_datasets = [ dict( type=LongBenchqasperDataset, abbr='LongBench_qasper', - path='THUDM/LongBench', + path='opencompass/Longbench', name='qasper', reader_cfg=LongBench_qasper_reader_cfg, infer_cfg=LongBench_qasper_infer_cfg, - eval_cfg=LongBench_qasper_eval_cfg) + eval_cfg=LongBench_qasper_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py b/opencompass/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py index 396a998f..682b380f 100644 --- a/opencompass/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py +++ b/opencompass/configs/datasets/longbench/longbenchqmsum/longbench_qmsum_gen_d33331.py @@ -7,7 +7,7 @@ LongBench_qmsum_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_qmsum_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_qmsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:'), - ], )), + dict( + role='HUMAN', + prompt='You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_qmsum_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT' ) LongBench_qmsum_datasets = [ dict( type=LongBenchqmsumDataset, abbr='LongBench_qmsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='qmsum', reader_cfg=LongBench_qmsum_reader_cfg, infer_cfg=LongBench_qmsum_infer_cfg, - eval_cfg=LongBench_qmsum_eval_cfg) + eval_cfg=LongBench_qmsum_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py b/opencompass/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py index e04b3d34..84f0fff8 100644 --- a/opencompass/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py +++ b/opencompass/configs/datasets/longbench/longbenchrepobench/longbench_repobench_gen_6df953.py @@ -7,7 +7,7 @@ LongBench_repobench_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_repobench_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_repobench_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please complete the code given below. \n{context}{input}Next line of code:\n'), - ], )), + dict( + role='HUMAN', + prompt='Please complete the code given below. \n{context}{input}Next line of code:\n', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_repobench_eval_cfg = dict( - evaluator=dict(type=LongBenchCodeSimEvaluator), - pred_role='BOT' + evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT' ) LongBench_repobench_datasets = [ dict( type=LongBenchrepobenchDataset, abbr='LongBench_repobench-p', - path='THUDM/LongBench', + path='opencompass/Longbench', name='repobench-p', reader_cfg=LongBench_repobench_reader_cfg, infer_cfg=LongBench_repobench_infer_cfg, - eval_cfg=LongBench_repobench_eval_cfg) + eval_cfg=LongBench_repobench_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py b/opencompass/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py index 51d2f74a..37f617b7 100644 --- a/opencompass/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py +++ b/opencompass/configs/datasets/longbench/longbenchsamsum/longbench_samsum_gen_f4416d.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchRougeEvaluator, LongBenchsamsumDataset, samsum_postprocess +from opencompass.datasets import ( + LongBenchRougeEvaluator, + LongBenchsamsumDataset, + samsum_postprocess, +) LongBench_samsum_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_samsum_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_samsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=128) + inferencer=dict(type=GenInferencer, max_out_len=128), ) LongBench_samsum_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_samsum_datasets = [ dict( type=LongBenchsamsumDataset, abbr='LongBench_samsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='samsum', reader_cfg=LongBench_samsum_reader_cfg, infer_cfg=LongBench_samsum_infer_cfg, - eval_cfg=LongBench_samsum_eval_cfg) + eval_cfg=LongBench_samsum_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py b/opencompass/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py index 66719fb9..ca44b384 100644 --- a/opencompass/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py +++ b/opencompass/configs/datasets/longbench/longbenchtrec/longbench_trec_gen_824187.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchtrecDataset, trec_postprocess +from opencompass.datasets import ( + LongBenchClassificationEvaluator, + LongBenchtrecDataset, + trec_postprocess, +) LongBench_trec_reader_cfg = dict( input_columns=['context', 'input'], output_column='all_labels', train_split='test', - test_split='test' + test_split='test', ) LongBench_trec_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_trec_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=64) + inferencer=dict(type=GenInferencer, max_out_len=64), ) LongBench_trec_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_trec_datasets = [ dict( type=LongBenchtrecDataset, abbr='LongBench_trec', - path='THUDM/LongBench', + path='opencompass/Longbench', name='trec', reader_cfg=LongBench_trec_reader_cfg, infer_cfg=LongBench_trec_infer_cfg, - eval_cfg=LongBench_trec_eval_cfg) + eval_cfg=LongBench_trec_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py b/opencompass/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py index 2cfb7fc1..73b1a364 100644 --- a/opencompass/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py +++ b/opencompass/configs/datasets/longbench/longbenchtriviaqa/longbench_triviaqa_gen_d30cb9.py @@ -1,13 +1,17 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets import LongBenchF1Evaluator, LongBenchtriviaqaDataset, triviaqa_postprocess +from opencompass.datasets import ( + LongBenchF1Evaluator, + LongBenchtriviaqaDataset, + triviaqa_postprocess, +) LongBench_triviaqa_reader_cfg = dict( input_columns=['context', 'input'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_triviaqa_infer_cfg = dict( @@ -15,10 +19,15 @@ LongBench_triviaqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}'), - ], )), + dict( + role='HUMAN', + prompt='Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=32) + inferencer=dict(type=GenInferencer, max_out_len=32), ) LongBench_triviaqa_eval_cfg = dict( @@ -31,9 +40,10 @@ LongBench_triviaqa_datasets = [ dict( type=LongBenchtriviaqaDataset, abbr='LongBench_triviaqa', - path='THUDM/LongBench', + path='opencompass/Longbench', name='triviaqa', reader_cfg=LongBench_triviaqa_reader_cfg, infer_cfg=LongBench_triviaqa_infer_cfg, - eval_cfg=LongBench_triviaqa_eval_cfg) + eval_cfg=LongBench_triviaqa_eval_cfg, + ) ] diff --git a/opencompass/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py b/opencompass/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py index 1264940c..8b10b917 100644 --- a/opencompass/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py +++ b/opencompass/configs/datasets/longbench/longbenchvcsum/longbench_vcsum_gen_f7a8ac.py @@ -7,7 +7,7 @@ LongBench_vcsum_reader_cfg = dict( input_columns=['context'], output_column='answers', train_split='test', - test_split='test' + test_split='test', ) LongBench_vcsum_infer_cfg = dict( @@ -15,24 +15,29 @@ LongBench_vcsum_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role='HUMAN', prompt='下面有一段会议记录,请你阅读后,写一段总结,总结会议的内容。\n会议记录:\n{context}\n\n会议总结:'), - ], )), + dict( + role='HUMAN', + prompt='下面有一段会议记录,请你阅读后,写一段总结,总结会议的内容。\n会议记录:\n{context}\n\n会议总结:', + ), + ], + ), + ), retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=512) + inferencer=dict(type=GenInferencer, max_out_len=512), ) LongBench_vcsum_eval_cfg = dict( - evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), - pred_role='BOT' + evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT' ) LongBench_vcsum_datasets = [ dict( type=LongBenchvcsumDataset, abbr='LongBench_vcsum', - path='THUDM/LongBench', + path='opencompass/Longbench', name='vcsum', reader_cfg=LongBench_vcsum_reader_cfg, infer_cfg=LongBench_vcsum_infer_cfg, - eval_cfg=LongBench_vcsum_eval_cfg) + eval_cfg=LongBench_vcsum_eval_cfg, + ) ] diff --git a/opencompass/datasets/longbench/longbench_2wikim_qa.py b/opencompass/datasets/longbench/longbench_2wikim_qa.py index d435c41e..f3bd033c 100644 --- a/opencompass/datasets/longbench/longbench_2wikim_qa.py +++ b/opencompass/datasets/longbench/longbench_2wikim_qa.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBench2wikimqaDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_dureader.py b/opencompass/datasets/longbench/longbench_dureader.py index 28d013ce..79ec7bfa 100644 --- a/opencompass/datasets/longbench/longbench_dureader.py +++ b/opencompass/datasets/longbench/longbench_dureader.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchdureaderDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_gov_report.py b/opencompass/datasets/longbench/longbench_gov_report.py index 41a3cf73..ad9edf00 100644 --- a/opencompass/datasets/longbench/longbench_gov_report.py +++ b/opencompass/datasets/longbench/longbench_gov_report.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchgov_reportDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_hotpot_qa.py b/opencompass/datasets/longbench/longbench_hotpot_qa.py index 45734df6..e687c00d 100644 --- a/opencompass/datasets/longbench/longbench_hotpot_qa.py +++ b/opencompass/datasets/longbench/longbench_hotpot_qa.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchhotpotqaDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_lcc.py b/opencompass/datasets/longbench/longbench_lcc.py index f7f94b6f..0a5c15f7 100644 --- a/opencompass/datasets/longbench/longbench_lcc.py +++ b/opencompass/datasets/longbench/longbench_lcc.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchlccDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_lsht.py b/opencompass/datasets/longbench/longbench_lsht.py index 7916b046..52f64a8b 100644 --- a/opencompass/datasets/longbench/longbench_lsht.py +++ b/opencompass/datasets/longbench/longbench_lsht.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchlshtDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_multi_news.py b/opencompass/datasets/longbench/longbench_multi_news.py index fe746f37..f6caa3d2 100644 --- a/opencompass/datasets/longbench/longbench_multi_news.py +++ b/opencompass/datasets/longbench/longbench_multi_news.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchmulti_newsDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_multifieldqa_en.py b/opencompass/datasets/longbench/longbench_multifieldqa_en.py index 9272d5c2..adbe9041 100644 --- a/opencompass/datasets/longbench/longbench_multifieldqa_en.py +++ b/opencompass/datasets/longbench/longbench_multifieldqa_en.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchmultifieldqa_enDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_multifieldqa_zh.py b/opencompass/datasets/longbench/longbench_multifieldqa_zh.py index f1b9f6e6..caa20860 100644 --- a/opencompass/datasets/longbench/longbench_multifieldqa_zh.py +++ b/opencompass/datasets/longbench/longbench_multifieldqa_zh.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchmultifieldqa_zhDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_musique.py b/opencompass/datasets/longbench/longbench_musique.py index 836777c1..7c67a6e8 100644 --- a/opencompass/datasets/longbench/longbench_musique.py +++ b/opencompass/datasets/longbench/longbench_musique.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchmusiqueDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_narrative_qa.py b/opencompass/datasets/longbench/longbench_narrative_qa.py index 54378da2..a254c79c 100644 --- a/opencompass/datasets/longbench/longbench_narrative_qa.py +++ b/opencompass/datasets/longbench/longbench_narrative_qa.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchnarrativeqaDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_passage_count.py b/opencompass/datasets/longbench/longbench_passage_count.py index d72ea97a..b1f64da1 100644 --- a/opencompass/datasets/longbench/longbench_passage_count.py +++ b/opencompass/datasets/longbench/longbench_passage_count.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchpassage_countDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_passage_retrieval_en.py b/opencompass/datasets/longbench/longbench_passage_retrieval_en.py index 1dc0612f..feb205e4 100644 --- a/opencompass/datasets/longbench/longbench_passage_retrieval_en.py +++ b/opencompass/datasets/longbench/longbench_passage_retrieval_en.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchpassage_retrieval_enDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py b/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py index 50b3ba0d..22058c0c 100644 --- a/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py +++ b/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchpassage_retrieval_zhDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_qasper.py b/opencompass/datasets/longbench/longbench_qasper.py index e1518daa..e8b5e6bb 100644 --- a/opencompass/datasets/longbench/longbench_qasper.py +++ b/opencompass/datasets/longbench/longbench_qasper.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchqasperDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_qmsum.py b/opencompass/datasets/longbench/longbench_qmsum.py index 06fd4310..a3ff5a16 100644 --- a/opencompass/datasets/longbench/longbench_qmsum.py +++ b/opencompass/datasets/longbench/longbench_qmsum.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchqmsumDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_repobench.py b/opencompass/datasets/longbench/longbench_repobench.py index 1fed2331..8213d2b2 100644 --- a/opencompass/datasets/longbench/longbench_repobench.py +++ b/opencompass/datasets/longbench/longbench_repobench.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchrepobenchDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_samsum.py b/opencompass/datasets/longbench/longbench_samsum.py index 2cd5b808..60239c82 100644 --- a/opencompass/datasets/longbench/longbench_samsum.py +++ b/opencompass/datasets/longbench/longbench_samsum.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchsamsumDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_trec.py b/opencompass/datasets/longbench/longbench_trec.py index b0b0fffc..76e24d17 100644 --- a/opencompass/datasets/longbench/longbench_trec.py +++ b/opencompass/datasets/longbench/longbench_trec.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchtrecDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_trivia_qa.py b/opencompass/datasets/longbench/longbench_trivia_qa.py index 19a84a03..b28f5029 100644 --- a/opencompass/datasets/longbench/longbench_trivia_qa.py +++ b/opencompass/datasets/longbench/longbench_trivia_qa.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchtriviaqaDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/datasets/longbench/longbench_vcsum.py b/opencompass/datasets/longbench/longbench_vcsum.py index 28526aaa..5850b202 100644 --- a/opencompass/datasets/longbench/longbench_vcsum.py +++ b/opencompass/datasets/longbench/longbench_vcsum.py @@ -10,11 +10,12 @@ from ..base import BaseDataset class LongBenchvcsumDataset(BaseDataset): @staticmethod - def load(**kwargs): - if 'data_files' in kwargs: - kwargs['data_files'] = get_data_path(kwargs['data_files'], - local_mode=True) - dataset = load_dataset(**kwargs) + def load(path: str, name: str): + path = get_data_path(path) + dataset = load_dataset(path=path, + name=name, + data_dir=path, + trust_remote_code=True) split = 'test' raw_data = [] for i in range(len(dataset[split])): diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 8ee208ea..0c20acb5 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -265,6 +265,12 @@ DATASETS_MAPPING = { "hf_id": "opencompass/xsum", "local": "./data/Xsum/dev.jsonl", }, + # Longbench + "opencompass/Longbench": { + "ms_id": "", + "hf_id": "THUDM/LongBench", + "local": "./data/Longbench", + }, # Needlebench "opencompass/needlebench": { "ms_id": "", @@ -402,6 +408,10 @@ DATASETS_URL = { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu_pro.zip", "md5": "e3200c7380f4cea5f13c768f2815fabb", }, + "/Longbench": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/Longbench.zip", + "md5": "ab0cb9e520ae5cfb899bf38b564249bb", + }, "/needlebench": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/needlebench.zip", "md5": "b546da0397746eaff4d3ff0f20d6ede2",