[Feature] Add LEval datasets

Co-authored-by: kennymckormick <dhd@pku.edu.cn>
This commit is contained in:
Tong Gao 2023-08-11 17:38:31 +08:00 committed by GitHub
parent 8d9cee060f
commit bf79ff1c6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
76 changed files with 1471 additions and 12 deletions

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_coursera_gen_5c84a9 import LEval_coursera_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalCourseraDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_coursera_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_coursera_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_coursera_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess_multi),
pred_role='BOT'
)
LEval_coursera_datasets = [
dict(
type=LEvalCourseraDataset,
abbr='LEval_coursera',
path='L4NLP/LEval',
name='coursera',
reader_cfg=LEval_coursera_reader_cfg,
infer_cfg=LEval_coursera_infer_cfg,
eval_cfg=LEval_coursera_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_financialqa_gen_9f5404 import LEval_financialqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalFinancialQADataset
LEval_financialqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_financialqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_financialqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_financialqa_datasets = [
dict(
type=LEvalFinancialQADataset,
abbr='LEval_financialqa',
path='L4NLP/LEval',
name='financial_qa',
reader_cfg=LEval_financialqa_reader_cfg,
infer_cfg=LEval_financialqa_infer_cfg,
eval_cfg=LEval_financialqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_gsm100_gen_a4d1f8 import LEval_gsm100_datasets

View File

@ -0,0 +1,43 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalGSM100Dataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
from opencompass.registry import TEXT_POSTPROCESSORS
from opencompass.datasets import gsm100_dataset_postprocess, gsm100_postprocess
LEval_gsm100_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_gsm100_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{question}\n'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_gsm100_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=gsm100_postprocess),
dataset_postprocessor=dict(type=gsm100_dataset_postprocess)
)
LEval_gsm100_datasets = [
dict(
type=LEvalGSM100Dataset,
abbr='LEval_gsm100',
path='L4NLP/LEval',
name='gsm100',
reader_cfg=LEval_gsm100_reader_cfg,
infer_cfg=LEval_gsm100_infer_cfg,
eval_cfg=LEval_gsm100_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_gov_report_summ_gen_c68a56 import LEval_govreport_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalGovReportSummDataset
LEval_govreport_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_govreport_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Government report: {context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_govreport_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_govreport_summ_datasets = [
dict(
type=LEvalGovReportSummDataset,
abbr='LEval_gov_report_summ',
path='L4NLP/LEval',
name='gov_report_summ',
reader_cfg=LEval_govreport_summ_reader_cfg,
infer_cfg=LEval_govreport_summ_infer_cfg,
eval_cfg=LEval_govreport_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_legalcontractqa_gen_f0bb20 import LEval_legalqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalLegalContractQADataset
LEval_legalqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_legalqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=128)
)
LEval_legalqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_legalqa_datasets = [
dict(
type=LEvalLegalContractQADataset,
abbr='LEval_legal_contract_qa',
path='L4NLP/LEval',
name='legal_contract_qa',
reader_cfg=LEval_legalqa_reader_cfg,
infer_cfg=LEval_legalqa_infer_cfg,
eval_cfg=LEval_legalqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_meetingsumm_gen_6c03d0 import LEval_meetingsumm_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalMeetingSummDataset
LEval_meetingsumm_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_meetingsumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_meetingsumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_meetingsumm_datasets = [
dict(
type=LEvalMeetingSummDataset,
abbr='LEval_meeting_summ',
path='L4NLP/LEval',
name='meeting_summ',
reader_cfg=LEval_meetingsumm_reader_cfg,
infer_cfg=LEval_meetingsumm_infer_cfg,
eval_cfg=LEval_meetingsumm_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_multidocqa_gen_87dc85 import LEval_multidocqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalMultidocQADataset
LEval_multidocqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_multidocqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)
LEval_multidocqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_multidocqa_datasets = [
dict(
type=LEvalMultidocQADataset,
abbr='LEval_multidocqa',
path='L4NLP/LEval',
name='multidoc_qa',
reader_cfg=LEval_multidocqa_reader_cfg,
infer_cfg=LEval_multidocqa_infer_cfg,
eval_cfg=LEval_multidocqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_narrativeqa_gen_9fec98 import LEval_narrativeqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalNarrativeQADataset
LEval_narrativeqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_narrativeqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50)
)
LEval_narrativeqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_narrativeqa_datasets = [
dict(
type=LEvalNarrativeQADataset,
abbr='LEval_narrativeqa',
path='L4NLP/LEval',
name='narrative_qa',
reader_cfg=LEval_narrativeqa_reader_cfg,
infer_cfg=LEval_narrativeqa_infer_cfg,
eval_cfg=LEval_narrativeqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_naturalquestion_gen_9fec98 import LEval_nq_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNaturalQuestionDataset
LEval_nq_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50)
)
LEval_nq_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_nq_datasets = [
dict(
type=LEvalNaturalQuestionDataset,
abbr='LEval_nq',
path='L4NLP/LEval',
name='natural_question',
reader_cfg=LEval_nq_reader_cfg,
infer_cfg=LEval_nq_infer_cfg,
eval_cfg=LEval_nq_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_newssumm_gen_db3565 import LEval_newssumm_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNewsSummDataset
LEval_newssumm_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_newssumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_newssumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_newssumm_datasets = [
dict(
type=LEvalNewsSummDataset,
abbr='LEval_news_summ',
path='L4NLP/LEval',
name='news_summ',
reader_cfg=LEval_newssumm_reader_cfg,
infer_cfg=LEval_newssumm_infer_cfg,
eval_cfg=LEval_newssumm_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_paper_assistant_gen_6c03d0 import LEval_ps_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPaperAssistantDataset
LEval_ps_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_ps_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_ps_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_ps_summ_datasets = [
dict(
type=LEvalPaperAssistantDataset,
abbr='LEval_paper_assistant',
path='L4NLP/LEval',
name='paper_assistant',
reader_cfg=LEval_ps_summ_reader_cfg,
infer_cfg=LEval_ps_summ_infer_cfg,
eval_cfg=LEval_ps_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_patent_summ_gen_db3565 import LEval_patent_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPatentSummDataset
LEval_patent_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_patent_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_patent_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_patent_summ_datasets = [
dict(
type=LEvalPatentSummDataset,
abbr='LEval_patent_summ',
path='L4NLP/LEval',
name='patent_summ',
reader_cfg=LEval_patent_summ_reader_cfg,
infer_cfg=LEval_patent_summ_infer_cfg,
eval_cfg=LEval_patent_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_quality_gen_bd35f4 import LEval_quality_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalQualityDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_quality_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_quality_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_quality_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess),
pred_role='BOT'
)
LEval_quality_datasets = [
dict(
type=LEvalQualityDataset,
abbr='LEval_quality',
path='L4NLP/LEval',
name='quality',
reader_cfg=LEval_quality_reader_cfg,
infer_cfg=LEval_quality_infer_cfg,
eval_cfg=LEval_quality_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_review_summ_gen_6c03d0 import LEval_review_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalReviewSummDataset
LEval_review_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_review_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_review_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_review_summ_datasets = [
dict(
type=LEvalReviewSummDataset,
abbr='LEval_review_summ',
path='L4NLP/LEval',
name='review_summ',
reader_cfg=LEval_review_summ_reader_cfg,
infer_cfg=LEval_review_summ_infer_cfg,
eval_cfg=LEval_review_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_scientificqa_gen_0c6e71 import LEval_scientificqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalScientificQADataset
LEval_scientificqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_scientificqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)
LEval_scientificqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_scientificqa_datasets = [
dict(
type=LEvalScientificQADataset,
abbr='LEval_scientificqa',
path='L4NLP/LEval',
name='scientific_qa',
reader_cfg=LEval_scientificqa_reader_cfg,
infer_cfg=LEval_scientificqa_infer_cfg,
eval_cfg=LEval_scientificqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_tpo_gen_bd35f4 import LEval_tpo_datasets

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalTPODataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_tpo_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tpo_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_tpo_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess),
pred_role='BOT'
)
LEval_tpo_datasets = [
dict(
type=LEvalTPODataset,
abbr='LEval_tpo',
path='L4NLP/LEval',
name='tpo',
reader_cfg=LEval_tpo_reader_cfg,
infer_cfg=LEval_tpo_infer_cfg,
eval_cfg=LEval_tpo_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_rouge import LEval_tvshow_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalTVShowSummDataset
LEval_tvshow_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tvshow_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}'),
dict(role='BOT', prompt='TL;DR:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_tvshow_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_tvshow_summ_datasets = [
dict(
type=LEvalTVShowSummDataset,
abbr='LEval_tvshow_summ',
path='L4NLP/LEval',
name='tv_show_summ',
reader_cfg=LEval_tvshow_summ_reader_cfg,
infer_cfg=LEval_tvshow_summ_infer_cfg,
eval_cfg=LEval_tvshow_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_049a5c import LEval_tvshow_summ_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalTVShowSummDataset
LEval_tvshow_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tvshow_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_tvshow_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_tvshow_summ_datasets = [
dict(
type=LEvalTVShowSummDataset,
abbr='LEval_tvshow_summ',
path='L4NLP/LEval',
name='tv_show_summ',
reader_cfg=LEval_tvshow_summ_reader_cfg,
infer_cfg=LEval_tvshow_summ_infer_cfg,
eval_cfg=LEval_tvshow_summ_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .LEval_topic_retrieval_gen_af0562 import LEval_tr_datasets

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalTopicRetrievalDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi, general_postprocess
LEval_tr_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tr_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=30)
)
LEval_tr_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_postprocessor=dict(type=general_postprocess),
pred_role='BOT'
)
LEval_tr_datasets = [
dict(
type=LEvalTopicRetrievalDataset,
abbr='LEval_topic_retrieval',
path='L4NLP/LEval',
name='topic_retrieval_longchat',
reader_cfg=LEval_tr_reader_cfg,
infer_cfg=LEval_tr_infer_cfg,
eval_cfg=LEval_tr_eval_cfg)
]

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .agieval_gen_397d81 import agieval_datasets # noqa: F401, F403
from .agieval_gen_64afd3 import agieval_datasets # noqa: F401, F403

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .bbh_gen_5b92b0 import bbh_datasets # noqa: F401, F403
from .bbh_gen_6bd693 import bbh_datasets # noqa: F401, F403

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .cmmlu_gen_ffe7c0 import cmmlu_datasets # noqa: F401, F403
from .cmmlu_gen_c13365 import cmmlu_datasets # noqa: F401, F403

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .cmmlu_ppl_fd1f2f import cmmlu_datasets # noqa: F401, F403
from .cmmlu_ppl_8b9c76 import cmmlu_datasets # noqa: F401, F403

View File

@ -5,7 +5,7 @@ with read_base():
from ..ceval.ceval_ppl_578f8d import ceval_datasets
from ..agieval.agieval_mixed_2f14ad import agieval_datasets
from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..bbh.bbh_gen_6bd693 import bbh_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets

View File

@ -2,7 +2,7 @@ from mmengine.config import read_base
with read_base():
from ..ceval.ceval_ppl_578f8d import ceval_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..bbh.bbh_gen_6bd693 import bbh_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets

View File

@ -3,9 +3,9 @@ from mmengine.config import read_base
with read_base():
from ..mmlu.mmlu_gen_a484b3 import mmlu_datasets
from ..ceval.ceval_gen_5f30c7 import ceval_datasets
from ..agieval.agieval_gen_397d81 import agieval_datasets
from ..agieval.agieval_gen_64afd3 import agieval_datasets
from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..bbh.bbh_gen_6bd693 import bbh_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets

View File

@ -3,7 +3,7 @@ from mmengine.config import read_base
with read_base():
from ..mmlu.mmlu_gen_a484b3 import mmlu_datasets
from ..ceval.ceval_gen_5f30c7 import ceval_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..bbh.bbh_gen_6bd693 import bbh_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
@ -35,6 +35,6 @@ with read_base():
from ..obqa.obqa_gen_9069e4 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_gen_21f7cb import crowspairs_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .crowspairs_gen_21f7cb import crowspairs_datasets # noqa: F401, F403
from .crowspairs_gen_381af0 import crowspairs_datasets # noqa: F401, F403

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base
with read_base():
from .cvalues_responsibility_gen_4aec9f import cvalues_datasets # noqa: F401, F403
from .cvalues_responsibility_gen_543378 import cvalues_datasets # noqa: F401, F403

48
configs/eval_LEval.py Normal file
View File

@ -0,0 +1,48 @@
from opencompass.models import HuggingFaceCausalLM
import torch
# long context evaluation tasks
with read_base():
from .datasets.LEvalNaturalQuestion.LEval_naturalquestion_gen import LEval_nq_datasets
from .datasets.LEvalNarrativeQA.LEval_narrativeqa_gen import LEval_narrativeqa_datasets
from .datasets.LEvalMultidocQA.LEval_multidocqa_gen import LEval_multidocqa_datasets
from .datasets.LEvalCoursera.LEval_coursera_gen import LEval_coursera_datasets
from .datasets.LEvalTPO.LEval_tpo_gen import LEval_tpo_datasets
from .datasets.LEvalQuality.LEval_quality_gen import LEval_quality_datasets
from .datasets.LEvalGSM100.LEval_gsm100_gen import LEval_gsm100_datasets
from .datasets.LEvalTopicRetrieval.LEval_topic_retrieval_gen import LEval_tr_datasets
from .datasets.LEvalFinancialQA.LEval_financialqa_gen import LEval_financialqa_datasets
from .datasets.LEvalGovReportSumm.LEval_gov_report_summ_gen import LEval_govreport_summ_datasets
from .datasets.LEvalLegalContractQA.LEval_legalcontractqa_gen import LEval_legalqa_datasets
from .datasets.LEvalMeetingSumm.LEval_meetingsumm_gen import LEval_meetingsumm_datasets
from .datasets.LEvalNewsSumm.LEval_newssumm_gen import LEval_newssumm_datasets
from .datasets.LEvalPaperAssistant.LEval_paper_assistant_gen import LEval_ps_summ_datasets
from .datasets.LEvalPatentSumm.LEval_patent_summ_gen import LEval_patent_summ_datasets
from .datasets.LEvalTVShowSumm.LEval_tvshow_summ_gen import LEval_tvshow_summ_datasets
from .datasets.LEvalScientificQA.LEval_scientificqa_gen import LEval_scientificqa_datasets
from .datasets.LEvalReviewSumm.LEval_review_summ_gen import LEval_review_summ_datasets
# choose a model of interest
# ininternlm as an example
from .models.hf_internlm_7b import models
# and output the results in a choosen format
from .summarizers.LEval import summarizer
datasets = [*LEval_coursera_datasets,
*LEval_tpo_datasets,
*LEval_quality_datasets,
*LEval_gsm100_datasets,
*LEval_tr_datasets,
*LEval_financialqa_datasets,
*LEval_govreport_summ_datasets,
*LEval_legalqa_datasets,
*LEval_meetingsumm_datasets,
*LEval_multidocqa_datasets,
*LEval_narrativeqa_datasets,
*LEval_nq_datasets,
*LEval_newssumm_datasets,
*LEval_newssumm_datasets,
*LEval_patent_summ_datasets,
*LEval_tvshow_summ_datasets,
*LEval_scientificqa_datasets,
*LEval_review_summ_datasets,
*LEval_ps_summ_datasets]

View File

@ -0,0 +1,29 @@
summarizer = dict(
dataset_abbrs = [
'--------- LEval Exact Match (Acc) ---------', # category
"LEval_coursera",
'LEval_gsm100',
'LEval_quality',
"LEval_tpo",
'LEval_topic_retrieval',
'--------- LEval Gen (ROUGE) ---------', # category
'LEval_financialqa',
'LEval_gov_report_summ',
'LEval_legal_contract_qa',
'LEval_meeting_summ',
'LEval_multidocqa',
'LEval_narrativeqa',
'LEval_nq',
'LEval_news_summ',
'LEval_paper_assistant',
'LEval_patent_summ',
'LEval_review_summ',
'LEval_scientificqa',
'LEval_tvshow_summ'
],
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
prompt_db=dict(
database_path='configs/datasets/log.json',
config_dir='configs/datasets',
blacklist='.promptignore'),
)

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalCourseraDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalFinancialQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalGovReportSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,58 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@TEXT_POSTPROCESSORS.register_module('gsm100_dataset')
def gsm100_dataset_postprocess(text: str) -> str:
return text.replace(',', '')
@TEXT_POSTPROCESSORS.register_module('gsm100')
def gsm100_postprocess(text: str) -> str:
# text = text.split('\n\n')[0]
segs = text.split('The answer is')
if len(segs) < 2:
return ''
text = segs[1]
text = text.split(' ')
flag = False
ret = ''
for i in range(len(text)):
s = text[i]
for i in range(len(s)):
if s[i].isdigit():
flag = True
ret = s
break
if flag:
break
ret1 = ''
for i in range(len(ret)):
if ret[i].isdigit():
ret1 += ret[i]
return ret1
@LOAD_DATASET.register_module()
class LEvalGSM100Dataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalLegalContractQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalMeetingSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalMultidocQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalNarrativeQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalNaturalQuestionDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalNewsSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalPaperAssistantDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalPatentSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalQualityDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer[1]
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalReviewSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalScientificQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalTopicRetrievalDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalTPODataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1,27 @@
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LEvalTVShowSummDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
instructions = dataset[split]['instructions'][i]
outputs = dataset[split]['outputs'][i]
context = dataset[split]['input'][i]
for question, answer in zip(instructions, outputs):
raw_data.append({
'question': question,
'context': context,
'answer': answer
})
dataset[split] = Dataset.from_list(raw_data)
return dataset

View File

@ -34,6 +34,24 @@ from .iwslt2017 import * # noqa: F401, F403
from .jigsawmultilingual import * # noqa: F401, F403
from .lambada import * # noqa: F401, F403
from .lcsts import * # noqa: F401, F403
from .LEval_coursera import * # noqa: F401, F403
from .LEval_financial_qa import * # noqa: F401, F403
from .LEval_gov_report_summ import * # noqa: F401, F403
from .LEval_gsm100 import * # noqa: F401, F403
from .LEval_legal_contract_qa import * # noqa: F401, F403
from .LEval_meeting_summ import * # noqa: F401, F403
from .LEval_multidoc_qa import * # noqa: F401, F403
from .LEval_narrattive_qa import * # noqa: F401, F403
from .LEval_natural_question import * # noqa: F401, F403
from .LEval_news_summ import * # noqa: F401, F403
from .LEval_paper_assistant import * # noqa: F401, F403
from .LEval_patent_summ import * # noqa: F401, F403
from .LEval_quality import * # noqa: F401, F403
from .LEval_review_summ import * # noqa: F401, F403
from .LEval_scientific_qa import * # noqa: F401, F403
from .LEval_topic_retrieval import * # noqa: F401, F403
from .LEval_tpo import * # noqa: F401, F403
from .LEval_tvshow_summ import * # noqa: F401, F403
from .math import * # noqa: F401, F403
from .mbpp import * # noqa: F401, F403
from .mmlu import * # noqa: F401, F403