[Feat] support opencompass

This commit is contained in:
yingfhu 2023-07-04 22:11:33 +08:00
parent 7d346000bb
commit fb11108723
81 changed files with 1859 additions and 0 deletions

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey')
ARC_c_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"A": "Question: {question}\nAnswer: {textA}",
"B": "Question: {question}\nAnswer: {textB}",
"C": "Question: {question}\nAnswer: {textC}",
"D": "Question: {question}\nAnswer: {textD}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .CLUE_C3_gen_9e3de9 import C3_datasets # noqa: F401, F403

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset_V2
C3_reader_cfg = dict(
input_columns=[
"question",
"content",
"choice0",
"choice1",
"choice2",
"choice3",
"choices",
],
output_column="label",
)
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{content}\n问:{question}\nA. {choice0}\nB. {choice1}\nC. {choice2}\nD. {choice3}\n请从“A”“B”“C”“D”中进行选择。\n答:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
C3_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
C3_datasets = [
dict(
abbr="C3",
type=C3Dataset_V2,
path="./data/CLUE/C3/dev_0.json",
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .CLUE_DRCD_gen_03b96b import DRCD_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AFQMCDataset_V2
afqmc_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n语句一与语句二是关于蚂蚁金融产品的疑问,两者所询问的内容是否完全一致?\nA. 不完全一致\nB. 完全一致\n请从“A”“B”中进行选择。\n答:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
afqmc_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
afqmc_datasets = [
dict(
abbr="afqmc-dev",
type=AFQMCDataset_V2,
path="./data/CLUE/AFQMC/dev.json",
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg,
),
]

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
afqmc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "{sentence1},{sentence2}不同。",
1: "{sentence1},{sentence2}相似。"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
afqmc_datasets = [
dict(
type=HFDataset,
abbr='afqmc-dev',
path='json',
data_files='./data/CLUE/AFQMC/dev.json',
split='train',
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg),
]

View File

@ -0,0 +1,44 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
afqmc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN", prompt="{sentence1}”与“{sentence2}”不同还是相似?"),
dict(role="BOT", prompt="不同。")
]),
1:
dict(round=[
dict(
role="HUMAN", prompt="{sentence1}”与“{sentence2}”不同还是相似?"),
dict(role="BOT", prompt="相似")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
afqmc_datasets = [
dict(
type=HFDataset,
abbr='afqmc-dev',
path='json',
data_files='./data/CLUE/AFQMC/dev.json',
split='train',
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg),
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .CLUE_cmnli_gen_316313 import cmnli_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .CLUE_ocnli_gen_7c44b0 import ocnli_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .FewCLUE_chid_ppl_b6cd88 import chid_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .FewCLUE_cluewsc_gen_276956 import cluewsc_datasets # noqa: F401, F403

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CluewscDataset_V2
cluewsc_reader_cfg = dict(
input_columns=["span1", "span2", "text", "new_text"],
output_column="label",
)
cluewsc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\n此处,“{span2}”是否指代“{span1}“?\nA. 是\nB. 否\n请从”A“”B“中进行选择。\n答:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
cluewsc_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
cluewsc_datasets = [
dict(
abbr="cluewsc-dev",
type=CluewscDataset_V2,
path="./data/FewCLUE/cluewsc/dev_few_all.json",
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg,
),
dict(
abbr="cluewsc-test",
type=CluewscDataset_V2,
path="./data/FewCLUE/cluewsc/test_public.json",
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg,
),
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .FewCLUE_csl_gen_1b0c02 import csl_datasets # noqa: F401, F403

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CslDataset
csl_reader_cfg = dict(
input_columns=["abst", "keywords"], output_column='label')
csl_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "摘要:{abst}",
1: "摘要:{abst}\n关键词:{keywords}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
csl_datasets = [
dict(
type=CslDataset,
path='json',
abbr='csl_dev',
data_files='./data/FewCLUE/csl/dev_few_all.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg),
dict(
type=CslDataset,
path='json',
abbr='csl_test',
data_files='./data/FewCLUE/csl/test_public.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg)
]

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2
ocnli_fc_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
ocnli_fc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”“B”“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
ocnli_fc_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
ocnli_fc_datasets = [
dict(
abbr="ocnli_fc-dev",
type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli
path="./data/FewCLUE/ocnli/dev_few_all.json",
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg,
),
dict(
abbr="ocnli_fc-test",
type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli
path="./data/FewCLUE/ocnli/test_public.json",
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg,
),
]

View File

@ -0,0 +1,48 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
tnews_labels = [
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
]
tnews_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
lb: dict(round=[
dict(role='HUMAN', prompt='以下内容属于什么新闻:{sentence}'),
dict(role='BOT', prompt=lb)
])
for lb in tnews_labels
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
tnews_datasets = [
dict(
type=TNewsDataset,
path='json',
abbr='tnews-dev',
data_files='./data/FewCLUE/tnews/dev_few_all.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg),
dict(
type=TNewsDataset,
path='json',
abbr='tnews-test',
data_files='./data/FewCLUE/tnews/test_public.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg)
]

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
AX_b_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
AX_b_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="Yes"),
]),
"not_entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="No"),
])
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
AX_b_datasets = [
dict(
type=HFDataset,
abbr="AX_b",
path="json",
data_files="./data/SuperGLUE/AX-b/AX-b.jsonl",
split="train",
reader_cfg=AX_b_reader_cfg,
infer_cfg=AX_b_infer_cfg,
eval_cfg=AX_b_eval_cfg,
)
]

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDataset
BoolQ_reader_cfg = dict(
input_columns=["question", "passage"],
output_column="answer",
test_split="train")
BoolQ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"),
dict(role="BOT", prompt="No"),
]),
1:
dict(round=[
dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"),
dict(role="BOT", prompt="Yes"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
type=BoolQDataset,
abbr="BoolQ",
path="json",
data_files="./data/SuperGLUE/BoolQ/val.jsonl",
split="train",
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
CB_reader_cfg = dict(
input_columns=['premise', 'hypothesis'], output_column='label')
CB_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction': '{premise}?contradiction, {hypothesis}',
'entailment': '{premise}?entailment, {hypothesis}',
'neutral': '{premise}?neutral, {hypothesis}'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
CB_datasets = [
dict(
type=HFDataset,
abbr='CB',
path='json',
split='train',
data_files='./data/SuperGLUE/CB/val.jsonl',
reader_cfg=CB_reader_cfg,
infer_cfg=CB_infer_cfg,
eval_cfg=CB_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_COPA_gen_6d5e67 import COPA_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_COPA_ppl_ddb78c import COPA_datasets # noqa: F401, F403

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
COPA_reader_cfg = dict(
input_columns=['question', 'premise', 'choice1', 'choice2'],
output_column='label',
test_split='train')
COPA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Premise:{premise}\nQuestion:{question}\nAnswer: {choice1}.",
1: "Passage:{premise}\nQuestion:{question}\nAnswer: {choice2}.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
COPA_datasets = [
dict(
type=HFDataset,
abbr='COPA',
path='json',
data_files='./data/SuperGLUE/COPA/val.jsonl',
split='train',
reader_cfg=COPA_reader_cfg,
infer_cfg=COPA_infer_cfg,
eval_cfg=COPA_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets # noqa: F401, F403

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MultiRCDataset
MultiRC_reader_cfg = dict(
input_columns=['question', 'text', 'answer'], output_column='label')
MultiRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Passage:{text}\nQuestion:{question}\nAnswer: {answer}. It is false.",
1: "Passage:</P>。\nQuestion:{question}\nAnswer: {answer}. It is true.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
MultiRC_datasets = [
dict(
type=MultiRCDataset,
abbr='MultiRC',
path='./data/SuperGLUE/MultiRC/val.jsonl',
reader_cfg=MultiRC_reader_cfg,
infer_cfg=MultiRC_infer_cfg,
eval_cfg=MultiRC_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_RTE_ppl_29a22c import RTE_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import ReCoRDDataset
ReCoRD_reader_cfg = dict(
input_columns=["question", "text"],
output_column="answers",
)
ReCoRD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Passage: {text}\nResult: {question}\nQuestion: What entity does ____ refer to in the result? Give me the entity name:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
ReCoRD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type="ReCoRD"),
)
ReCoRD_datasets = [
dict(
type=ReCoRDDataset,
abbr="ReCoRD",
path="./data/SuperGLUE/ReCoRD/val.jsonl",
reader_cfg=ReCoRD_reader_cfg,
infer_cfg=ReCoRD_infer_cfg,
eval_cfg=ReCoRD_eval_cfg,
)
]

View File

@ -0,0 +1,51 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset_V2
WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text"],
output_column="label",
)
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'A':
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
),
dict(role='BOT', prompt='Yes'),
]),
'B':
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
),
dict(role='BOT', prompt='No'),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
WSC_datasets = [
dict(
abbr="WSC",
type=WSCDataset_V2,
path="./data/SuperGLUE/WSC/val.jsonl",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .XLSum_gen_1cc5f6 import XLSum_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .bbh_gen_58abc3 import bbh_datasets # noqa: F401, F403

View File

@ -0,0 +1,39 @@
from mmengine.config import read_base
with read_base():
from ..ceval.ceval_ppl_275812 import ceval_datasets
from ..bbh.bbh_gen_58abc3 import bbh_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets
from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets
from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets
from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets
from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets
from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets
from ..race.race_ppl_04e06a import race_datasets
from ..math.math_gen_78bcba import math_datasets
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
from ..summedits.summedits_ppl_163352 import summedits_datasets
from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets
from ..piqa.piqa_ppl_788dbe import piqa_datasets
from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets
from ..obqa.obqa_ppl_2b5b12 import obqa_datasets
from ..nq.nq_gen_c00b89 import nq_datasets
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template="Q: </Q>?\nA: </A>",
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template="</E>Question: </Q>? Answer: ",
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator))
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='/mnt/petrelfs/wuzhiyong/datasets/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
triviaqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='dev',
test_split='dev')
triviaqa_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='Q: </Q>\nA: </A>',
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template='</E>Question: </Q> Answer:',
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50))
triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
triviaqa_datasets = [
dict(
type=TriviaQADataset,
abbr='triviaqa',
path='./data/triviaqa/',
reader_cfg=triviaqa_reader_cfg,
infer_cfg=triviaqa_infer_cfg,
eval_cfg=triviaqa_eval_cfg)
]

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Complete the following python code:'),
],
round=[
dict(role='HUMAN', prompt='{prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .jigsawmultilingual_ppl_640128 import jigsawmultilingual_datasets # noqa: F401, F403

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import JigsawMultilingualDataset
jigsawmultilingual_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
# Jigsaw multilingual toxic dataset is not supported in Huggingface
# Please download the test dataset files `test.csv` and `test_labels.csv` from
# below link and move them the data/ folder to perform evaluation.
# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
jigsawmultilingual_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="Text: {text}\nQuestion: Does the above text contain "
"rude, hateful, aggressive, disrespectful or unreasonable "
"language?\nAnswer:")
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
jigsawmultilingual_datasets = []
for _l in lang:
jigsawmultilingual_datasets.append(
dict(
abbr=f'jigsaw_multilingual_{_l}',
type=JigsawMultilingualDataset,
path='data/test.csv',
label='data/test_labels.csv',
lang=_l,
reader_cfg=jigsawmultilingual_reader_cfg,
infer_cfg=jigsawmultilingual_infer_cfg,
eval_cfg=jigsawmultilingual_eval_cfg))
del lang, _l

View File

@ -0,0 +1,43 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import JigsawMultilingualDataset
jigsawmultilingual_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
# Jigsaw multilingual toxic dataset is not supported in Huggingface
# Please download the test dataset files `test.csv` and `test_labels.csv` from
# below link and move them the data/ folder to perform evaluation.
# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
jigsawmultilingual_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Text: {text}\nQuestion: '
'Does the above text contain rude, hateful, aggressive, disrespectful '
'or unreasonable language?\nAnswer:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
jigsawmultilingual_datasets = []
for _l in lang:
jigsawmultilingual_datasets.append(
dict(
abbr=f'jigsaw_multilingual_{_l}',
type=JigsawMultilingualDataset,
path='data/test.csv',
label='data/test_labels.csv',
lang=_l,
reader_cfg=jigsawmultilingual_reader_cfg,
infer_cfg=jigsawmultilingual_infer_cfg,
eval_cfg=jigsawmultilingual_eval_cfg))
del lang, _l

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .lambada_gen_7ffe3d import lambada_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .math_gen_78bcba import math_datasets # noqa: F401, F403

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Problem:
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
Solution:
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
Final Answer: The final answer is $[2,5)$. I hope it is correct.
Problem:
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
Solution:
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
Final Answer: The final answer is $24$. I hope it is correct.
Problem:
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
Solution:
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
Final Answer: The final answer is $16$. I hope it is correct.
Problem:
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
Solution:
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
Problem:
{problem}
Solution:
{solution}'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .narrativeqa_gen_5786a7 import narrativeqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,29 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: {question}?\nAnswer: '),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='./data/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .piqa_gen_8287ae import piqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,31 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'],
output_column='label',
test_split='validation')
piqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: 'The following makes sense: \nQ: {goal}\nA: {sol1}\n',
1: 'The following makes sense: \nQ: {goal}\nA: {sol2}\n'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [
dict(
type=HFDataset,
path='piqa',
reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .qabench_gen_0d5967 import qabench_datasets # noqa: F401, F403

View File

@ -0,0 +1,46 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
race_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'),
pred_role='BOT')
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
race_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .race_ppl_04e06a import race_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .realtoxicprompts_gen_3ea730 import realtoxicprompts_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .safety_gen_c0a5b8 import safety_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import siqaDataset_V2
siqa_reader_cfg = dict(
input_columns=["context", "question", "answerA", "answerB", "answerC"],
output_column="label",
test_split="validation")
siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:"
)
], ),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
siqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
siqa_datasets = [
dict(
abbr="siqa",
type=siqaDataset_V2,
path="social_i_qa",
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .siqa_ppl_049da0 import siqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .storycloze_ppl_c1912d import storycloze_datasets # noqa: F401, F403

View File

@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import storyclozeDataset
storycloze_reader_cfg = dict(
input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
output_column='answer_right_ending',
train_split='test',
test_split='test')
storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1: "{context}{sentence_quiz1}",
2: "{context}{sentence_quiz2}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
# The original story cloze dataset and repo are not long maintaining.
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .summedits_gen_4f35b5 import summedits_datasets # noqa: F401, F403

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import SummeditsDataset_V2
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'], output_column='label')
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
'Document:\n{doc}Summary:\n{summary}\nQuestion:\nIs the summary factually consistent with the document?\nA. Yes\nB. No\nAnswer:'
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
summedits_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
summedits_datasets = [
dict(
abbr='summedits',
type=SummeditsDataset_V2,
path='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TriviaQArcDataset, TriviaQAEvaluator
triviaqarc_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
triviaqarc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
triviaqarc_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
triviaqarc_datasets = [
dict(
type=TriviaQArcDataset,
abbr='triviaqarc',
path='./data/triviaqa-rc/',
reader_cfg=triviaqarc_reader_cfg,
infer_cfg=triviaqarc_infer_cfg,
eval_cfg=triviaqarc_eval_cfg)
]

View File

@ -0,0 +1,29 @@
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role='HUMAN', begin='\n\n### Instruction:\n:'),
dict(role='BOT', begin='\n\n### Response:\n:', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='TigerBot-SFT',
path="TigerResearch/tigerbot-7b-sft",
tokenizer_path='TigerResearch/tigerbot-7b-sft',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
meta_template=_meta_template,
model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='0ba4d6fc479bdedd6a3f8d4d3425025c5f501800'),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,6 @@
bbh_summary_groups = []
# bbh
_bbh = ['temporal_sequences', 'disambiguation_qa', 'date_understanding', 'tracking_shuffled_objects_three_objects', 'penguins_in_a_table','geometric_shapes', 'snarks', 'ruin_names', 'tracking_shuffled_objects_seven_objects', 'tracking_shuffled_objects_five_objects','logical_deduction_three_objects', 'hyperbaton', 'logical_deduction_five_objects', 'logical_deduction_seven_objects', 'movie_recommendation','salient_translation_error_detection', 'reasoning_about_colored_objects', 'multistep_arithmetic_two', 'navigate', 'dyck_languages', 'word_sorting', 'sports_understanding','boolean_expressions', 'object_counting', 'formal_fallacies', 'causal_judgement', 'web_of_lies']
_bbh = ['bbh-' + s for s in _bbh]
bbh_summary_groups.append({'name': 'bbh', 'subsets': _bbh})

View File

@ -0,0 +1,18 @@
{% extends "layout.html" %}
{% block body %}
<h1>Page Not Found</h1>
<p>
The page you are looking for cannot be found.
</p>
<p>
If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
the content table left, or go to <a href="{{ pathto(root_doc) }}">the homepage</a>.
</p>
<!-- <p>
If you cannot find documentation you want, please <a
href="">open an issue</a> to tell us!
</p> -->
{% endblock %}

View File

@ -0,0 +1 @@
# New Dataset

View File

@ -0,0 +1 @@
# Few-shot

View File

@ -0,0 +1,2 @@
# 学习配置文件

View File

@ -0,0 +1 @@
# 整体概括

View File

@ -0,0 +1,27 @@
import re
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@LOAD_DATASET.register_module()
class TheoremQADataset(BaseDataset):
@staticmethod
def load(path: str):
return load_dataset('csv', data_files={'test': path})
@TEXT_POSTPROCESSORS.register_module('TheoremQA')
def TheoremQA_postprocess(text: str) -> str:
text = text.strip().split('\n')[0].strip()
matches = re.findall(r'answer is (.*)', text)
if len(matches) == 0:
return text
else:
text = matches[0].strip()[:-1]
return text

View File

@ -0,0 +1,25 @@
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CBDataset_V2(BaseDataset):
@staticmethod
def load(path):
dataset = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
line['label'] = {
'contradiction': 'A',
'entailment': 'B',
'neutral': 'C'
}[line['label']]
dataset.append(line)
return Dataset.from_list(dataset)

View File

@ -0,0 +1,43 @@
import json
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CHIDDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
content = example['content']
for i, c in enumerate(example['candidates']):
example[f'content{i}'] = content.replace('#idiom#', c)
return example
dataset = dataset.map(preprocess)
return dataset
@LOAD_DATASET.register_module()
class CHIDDataset_V2(BaseDataset):
@staticmethod
def load(path):
data = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
item = {}
item['content'] = line['content'].replace('#idiom#', '______')
for i, c in enumerate(line['candidates']):
item[chr(ord('A') + i)] = c
item['answer'] = 'ABCDEFG'[line['answer']]
data.append(item)
return Dataset.from_list(data)

View File

@ -0,0 +1,36 @@
from datasets import DatasetDict, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CivilCommentsDataset(BaseDataset):
@staticmethod
def load(**kwargs):
train_dataset = load_dataset(**kwargs, split='train')
test_dataset = load_dataset(**kwargs, split='test')
def pre_process(example):
example['label'] = int(example['toxicity'] >= 0.5)
example['choices'] = ['no', 'yes']
return example
def remove_columns(dataset):
return dataset.remove_columns([
'severe_toxicity', 'obscene', 'threat', 'insult',
'identity_attack', 'sexual_explicit'
])
train_dataset = remove_columns(train_dataset)
test_dataset = remove_columns(test_dataset)
test_dataset = test_dataset.shuffle(seed=42)
test_dataset = test_dataset.select(list(range(10000)))
test_dataset = test_dataset.map(pre_process)
return DatasetDict({
'train': train_dataset,
'test': test_dataset,
})

View File

@ -0,0 +1,22 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class commonsenseqaDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def pre_process(example):
for i in range(5):
example[chr(ord('A') + i)] = example['choices']['text'][i]
return example
dataset = dataset.map(pre_process).remove_columns(
['question_concept', 'id', 'choices'])
return dataset

View File

@ -0,0 +1,34 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class crowspairsDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
example['label'] = 0
return example
return dataset.map(preprocess)
@LOAD_DATASET.register_module()
class crowspairsDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
example['label'] = 'A'
return example
return dataset.map(preprocess)

View File

@ -0,0 +1,27 @@
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class eprstmtDataset_V2(BaseDataset):
@staticmethod
def load(path):
data = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
item = {
'sentence': line['sentence'],
'label': {
'Positive': 'A',
'Negative': 'B',
}[line['label']],
}
data.append(item)
return Dataset.from_list(data)

View File

@ -0,0 +1,13 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class HFDataset(BaseDataset):
@staticmethod
def load(**kwargs):
return load_dataset(**kwargs)

View File

@ -0,0 +1,25 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class piqaDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
assert isinstance(example['label'], int)
if example['label'] < 0:
example['answer'] = 'NULL'
else:
example['answer'] = 'AB'[example['label']]
example.pop('label')
return example
dataset = dataset.map(preprocess)
return dataset

View File

@ -0,0 +1,30 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class RealToxicPromptsDataset(BaseDataset):
@staticmethod
def load(**kwargs):
challenging_subset = kwargs.pop('challenging_subset', False)
dataset = load_dataset(**kwargs)
def preprocess(example):
for k, v in example['prompt'].items():
k = 'prompt_' + k
example[k] = v
del example['prompt']
return example
dataset = dataset.map(preprocess)
# return challenging subset if necessary
if challenging_subset:
return dataset.filter(lambda example: example['challenging'])
return dataset

View File

@ -0,0 +1,20 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class siqaDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
example['label'] = ' ABC'[int(example['label'])]
return example
dataset = dataset.map(preprocess)
return dataset

View File

@ -0,0 +1,5 @@
from .icl_aucroc_evaluator import AUCROCEvaluator
from .icl_base_evaluator import BaseEvaluator
from .icl_em_evaluator import EMEvaluator
from .icl_hf_evaluator import * # noqa
from .icl_toxic_evaluator import ToxicEvaluator

View File

@ -0,0 +1,40 @@
import logging
import torch.distributed as dist
LOG_LEVEL = logging.INFO
SUBPROCESS_LOG_LEVEL = logging.ERROR
LOG_FORMATTER = '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s'
def get_logger(name, level=LOG_LEVEL, log_file=None, file_mode='w'):
formatter = logging.Formatter(LOG_FORMATTER)
logger = logging.getLogger(name)
for handler in logger.root.handlers:
if type(handler) is logging.StreamHandler:
handler.setLevel(logging.ERROR)
if dist.is_available() and dist.is_initialized():
rank = dist.get_rank()
else:
rank = 0
if rank == 0 and log_file is not None:
file_handler = logging.FileHandler(log_file, file_mode)
file_handler.setFormatter(formatter)
file_handler.setLevel(level)
logger.addHandler(file_handler)
if rank == 0:
logger.setLevel(level)
else:
logger.setLevel(SUBPROCESS_LOG_LEVEL)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
stream_handler.setLevel(level)
logger.addHandler(stream_handler)
return logger

View File

@ -0,0 +1,2 @@
from .naive import * # noqa: F401, F403
from .size import * # noqa: F401, F403

View File

@ -0,0 +1,10 @@
from .abbr import * # noqa
from .build import * # noqa
from .fileio import * # noqa
from .git import * # noqa
from .lark import * # noqa
from .logging import * # noqa
from .menu import * # noqa
from .prompt import * # noqa
from .summarizer import * # noqa
from .text_postprocessors import * # noqa

View File

@ -0,0 +1,22 @@
import copy
from mmengine.config import ConfigDict
from opencompass.registry import LOAD_DATASET, MODELS
def build_dataset_from_cfg(dataset_cfg: ConfigDict) -> ConfigDict:
dataset_cfg = copy.deepcopy(dataset_cfg)
dataset_cfg.pop('infer_cfg', None)
dataset_cfg.pop('eval_cfg', None)
dataset_cfg.pop('abbr', None)
return LOAD_DATASET.build(dataset_cfg)
def build_model_from_cfg(model_cfg: ConfigDict) -> ConfigDict:
model_cfg = copy.deepcopy(model_cfg)
model_cfg.pop('run_cfg', None)
model_cfg.pop('max_out_len', None)
model_cfg.pop('batch_size', None)
model_cfg.pop('abbr', None)
return MODELS.build(model_cfg)

View File

@ -0,0 +1,45 @@
from typing import Dict, List, Union
from datasets import Dataset, DatasetDict
def _check_type_list(obj, typelist: List):
for _type in typelist:
if _type is None:
if obj is None:
return obj
elif isinstance(obj, _type):
return obj
raise TypeError(
f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}'
)
def _check_dataset(obj) -> Union[Dataset, DatasetDict]:
if isinstance(obj, Dataset) or isinstance(obj, DatasetDict):
return obj
else:
raise TypeError(
f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}'
)
def _check_list(obj) -> List:
if isinstance(obj, List):
return obj
else:
raise TypeError(f'Expected a List object, but got {obj}')
def _check_str(obj) -> str:
if isinstance(obj, str):
return obj
else:
raise TypeError(f'Expected a str object, but got {obj}')
def _check_dict(obj) -> Dict:
if isinstance(obj, Dict):
return obj
else:
raise TypeError(f'Expected a Dict object, but got {obj}')