mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
parent
2d0b184bb6
commit
86d5ec3d0f
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ARC_c_reader_cfg = dict(
|
||||
input_columns=["question", "textA", "textB", "textC", "textD"],
|
||||
@ -27,7 +28,7 @@ ARC_c_infer_cfg = dict(
|
||||
ARC_c_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ARC_c_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ARC_e_reader_cfg = dict(
|
||||
input_columns=["question", "textA", "textB", "textC", "textD"],
|
||||
@ -27,7 +28,7 @@ ARC_e_infer_cfg = dict(
|
||||
ARC_e_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ARC_e_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import C3Dataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
C3_reader_cfg = dict(
|
||||
input_columns=[
|
||||
@ -35,7 +36,7 @@ C3_infer_cfg = dict(
|
||||
C3_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
C3_datasets = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .CLUE_CMRC_gen_941108 import CMRC_datasets # noqa: F401, F403
|
||||
from .CLUE_CMRC_gen_1bd3c8 import CMRC_datasets # noqa: F401, F403
|
||||
|
35
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py
Normal file
35
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py
Normal file
@ -0,0 +1,35 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import CMRCDataset, cmrc_postprocess
|
||||
|
||||
CMRC_reader_cfg = dict(
|
||||
input_columns=['question', 'context'], output_column='answers')
|
||||
|
||||
CMRC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="根据文章回答问题。你的答案应该尽可能简练,请以 ‘答案是’ 开头的句式作答。\n文章:{context}\n问:{question}\n答:"),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
CMRC_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type=cmrc_postprocess),
|
||||
)
|
||||
|
||||
CMRC_datasets = [
|
||||
dict(
|
||||
type=CMRCDataset,
|
||||
abbr='CMRC_dev',
|
||||
path='./data/CLUE/CMRC/dev.json',
|
||||
reader_cfg=CMRC_reader_cfg,
|
||||
infer_cfg=CMRC_infer_cfg,
|
||||
eval_cfg=CMRC_eval_cfg),
|
||||
]
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .CLUE_DRCD_gen_941108 import DRCD_datasets # noqa: F401, F403
|
||||
from .CLUE_DRCD_gen_1bd3c8 import DRCD_datasets # noqa: F401, F403
|
||||
|
36
configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_1bd3c8.py
Normal file
36
configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_1bd3c8.py
Normal file
@ -0,0 +1,36 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import DRCDDataset, drcd_postprocess
|
||||
|
||||
DRCD_reader_cfg = dict(
|
||||
input_columns=['question', 'context'], output_column='answers')
|
||||
|
||||
DRCD_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="根据文章回答问题。你的答案应该尽可能简练,请以 ‘答案是’ 开头的句式作答。\n文章:{context}\n问:{question}\n答:"),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
DRCD_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type=drcd_postprocess),
|
||||
|
||||
)
|
||||
|
||||
DRCD_datasets = [
|
||||
dict(
|
||||
type=DRCDDataset,
|
||||
abbr='DRCD_dev',
|
||||
path='./data/CLUE/DRCD/dev.json',
|
||||
reader_cfg=DRCD_reader_cfg,
|
||||
infer_cfg=DRCD_infer_cfg,
|
||||
eval_cfg=DRCD_eval_cfg),
|
||||
]
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AFQMCDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
afqmc_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -27,7 +28,7 @@ afqmc_infer_cfg = dict(
|
||||
afqmc_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
afqmc_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import cmnliDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
cmnli_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -27,7 +28,7 @@ cmnli_infer_cfg = dict(
|
||||
cmnli_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
cmnli_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import cmnliDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
cmnli_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -27,7 +28,7 @@ cmnli_infer_cfg = dict(
|
||||
cmnli_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
cmnli_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import cmnliDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ocnli_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -28,7 +29,7 @@ ocnli_infer_cfg = dict(
|
||||
ocnli_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ocnli_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import cmnliDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ocnli_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -28,7 +29,7 @@ ocnli_infer_cfg = dict(
|
||||
ocnli_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ocnli_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AFQMCDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
bustm_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -27,7 +28,7 @@ bustm_infer_cfg = dict(
|
||||
bustm_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
bustm_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CHIDDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
chid_reader_cfg = dict(
|
||||
input_columns=["content","A","B","C","D","E","F","G"],
|
||||
@ -27,7 +28,7 @@ chid_infer_cfg = dict(
|
||||
chid_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
chid_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CluewscDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
cluewsc_reader_cfg = dict(
|
||||
input_columns=["span1", "span2", "text", "new_text"],
|
||||
@ -27,7 +28,7 @@ cluewsc_infer_cfg = dict(
|
||||
cluewsc_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
cluewsc_datasets = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .FewCLUE_csl_gen_87f4a8 import csl_datasets # noqa: F401, F403
|
||||
from .FewCLUE_csl_gen_28b223 import csl_datasets # noqa: F401, F403
|
||||
|
51
configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_28b223.py
Normal file
51
configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_28b223.py
Normal file
@ -0,0 +1,51 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CslDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
csl_reader_cfg = dict(
|
||||
input_columns=["abst", "keywords"],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
csl_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"摘要是对论文内容不加注释和评论的简短陈述,要求扼要地说明研究工作的目的、研究方法和最终结论等。\n关键词是一篇学术论文的核心词汇,一般由一系列名词组成。关键词在全文中应有较高出现频率,且能起到帮助文献检索的作用。\n摘要:{abst}\n关键词:{keywords}\n请问上述关键词是否匹配摘要且符合要求?\nA. 否\nB. 是\n请从”A“,”B“中进行选择。\n答:"
|
||||
)
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
csl_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
csl_datasets = [
|
||||
dict(
|
||||
abbr="csl_dev",
|
||||
type=CslDataset_V2,
|
||||
path="./data/FewCLUE/csl/dev_few_all.json",
|
||||
reader_cfg=csl_reader_cfg,
|
||||
infer_cfg=csl_infer_cfg,
|
||||
eval_cfg=csl_eval_cfg,
|
||||
),
|
||||
dict(
|
||||
abbr="csl_test",
|
||||
type=CslDataset_V2,
|
||||
path="./data/FewCLUE/csl/test_public.json",
|
||||
reader_cfg=csl_reader_cfg,
|
||||
infer_cfg=csl_infer_cfg,
|
||||
eval_cfg=csl_eval_cfg,
|
||||
),
|
||||
]
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CslDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
csl_reader_cfg = dict(
|
||||
input_columns=["abst", "keywords"],
|
||||
@ -27,7 +28,7 @@ csl_infer_cfg = dict(
|
||||
csl_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
csl_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import eprstmtDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
eprstmt_reader_cfg = dict(
|
||||
input_columns=["sentence"], output_column="label", test_split="train")
|
||||
@ -25,7 +26,7 @@ eprstmt_infer_cfg = dict(
|
||||
eprstmt_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
eprstmt_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import cmnliDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ocnli_fc_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -26,7 +27,7 @@ ocnli_fc_infer_cfg = dict(
|
||||
ocnli_fc_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ocnli_fc_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TNewsDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
tnews_reader_cfg = dict(
|
||||
input_columns="sentence",
|
||||
@ -49,7 +50,7 @@ tnews_infer_cfg = dict(
|
||||
tnews_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
tnews_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AXDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
AX_b_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
@ -27,7 +28,7 @@ AX_b_infer_cfg = dict(
|
||||
AX_b_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
AX_b_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AXDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
AX_g_reader_cfg = dict(
|
||||
input_columns=["hypothesis", "premise"],
|
||||
@ -27,7 +28,7 @@ AX_g_infer_cfg = dict(
|
||||
AX_g_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
AX_g_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=["question", "passage"],
|
||||
@ -25,7 +26,7 @@ BoolQ_infer_cfg = dict(
|
||||
BoolQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
BoolQ_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CBDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
CB_reader_cfg = dict(
|
||||
input_columns=["premise", "hypothesis"],
|
||||
@ -28,7 +29,7 @@ CB_infer_cfg = dict(
|
||||
CB_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
CB_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import COPADataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
COPA_reader_cfg = dict(
|
||||
input_columns=["question", "premise", "choice1", "choice2"],
|
||||
@ -28,7 +29,7 @@ COPA_infer_cfg = dict(
|
||||
COPA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
COPA_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MultiRCDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
MultiRC_reader_cfg = dict(
|
||||
input_columns=["question", "text", "answer"],
|
||||
@ -27,7 +28,7 @@ MultiRC_infer_cfg = dict(
|
||||
MultiRC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
MultiRC_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AXDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
RTE_reader_cfg = dict(
|
||||
input_columns=["hypothesis", "premise"],
|
||||
@ -27,7 +28,7 @@ RTE_infer_cfg = dict(
|
||||
RTE_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
RTE_datasets = [
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import ReCoRDDataset
|
||||
from opencompass.datasets import ReCoRDDataset, ReCoRD_postprocess
|
||||
|
||||
ReCoRD_reader_cfg = dict(
|
||||
input_columns=['question', 'text'], output_column='answers')
|
||||
@ -16,7 +16,7 @@ ReCoRD_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
ReCoRD_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type='ReCoRD'))
|
||||
evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type=ReCoRD_postprocess))
|
||||
|
||||
ReCoRD_datasets = [
|
||||
dict(
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .SuperGLUE_WSC_gen_6dc406 import WSC_datasets # noqa: F401, F403
|
||||
from .SuperGLUE_WSC_gen_8a881c import WSC_datasets # noqa: F401, F403
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WSCDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
WSC_reader_cfg = dict(
|
||||
input_columns=["span1", "span2", "text"],
|
||||
@ -27,7 +28,7 @@ WSC_infer_cfg = dict(
|
||||
WSC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
WSC_datasets = [
|
||||
|
43
configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_8a881c.py
Normal file
43
configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_8a881c.py
Normal file
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WSCDataset_V3
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
WSC_reader_cfg = dict(
|
||||
input_columns=["span1", "span2", "text"],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
WSC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Passage: {text}\nDoes the pronoun # {span2} # refer to * {span1} *?\nA. Yes\nB. No\nAnseer:"
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
WSC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
WSC_datasets = [
|
||||
dict(
|
||||
abbr="WSC",
|
||||
type=WSCDataset_V3,
|
||||
path="./data/SuperGLUE/WSC/val.jsonl",
|
||||
reader_cfg=WSC_reader_cfg,
|
||||
infer_cfg=WSC_infer_cfg,
|
||||
eval_cfg=WSC_eval_cfg,
|
||||
)
|
||||
]
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WiCDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
WiC_reader_cfg = dict(
|
||||
input_columns=[
|
||||
@ -31,7 +32,7 @@ WiC_infer_cfg = dict(
|
||||
WiC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
WiC_datasets = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .TheoremQA_gen_a27a10 import TheoremQA_datasets # noqa: F401, F403
|
||||
from .TheoremQA_gen_7009de import TheoremQA_datasets # noqa: F401, F403
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TheoremQADataset
|
||||
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess
|
||||
|
||||
TheoremQA_reader_cfg = dict(
|
||||
input_columns=['Question', 'Answer_type'],
|
||||
@ -23,11 +23,11 @@ TheoremQA_infer_cfg = dict(
|
||||
type=PromptTemplate,
|
||||
template=TheoremQA_prompt2),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
TheoremQA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='TheoremQA'))
|
||||
pred_postprocessor=dict(type=TheoremQA_postprocess))
|
||||
|
||||
TheoremQA_datasets = [
|
||||
dict(
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TheoremQADataset
|
||||
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess
|
||||
|
||||
TheoremQA_reader_cfg = dict(
|
||||
input_columns=['Question', 'Answer_type'],
|
||||
@ -31,11 +31,11 @@ TheoremQA_infer_cfg = dict(
|
||||
dict(role='HUMAN', prompt=TheoremQA_prompt2),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
TheoremQA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='TheoremQA'))
|
||||
pred_postprocessor=dict(type=TheoremQA_postprocess))
|
||||
|
||||
TheoremQA_datasets = [
|
||||
dict(
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TheoremQADataset
|
||||
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess
|
||||
|
||||
TheoremQA_reader_cfg = dict(
|
||||
input_columns=['Question', 'Answer_type'],
|
||||
@ -20,11 +20,11 @@ TheoremQA_infer_cfg = dict(
|
||||
),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
TheoremQA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='TheoremQA'))
|
||||
pred_postprocessor=dict(type=TheoremQA_postprocess))
|
||||
|
||||
TheoremQA_datasets = [
|
||||
dict(
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import RougeEvaluator
|
||||
from opencompass.datasets import XLSUMDataset
|
||||
from opencompass.datasets import XLSUMDataset, Xsum_postprocess
|
||||
|
||||
XLSum_reader_cfg = dict(input_columns=['text'], output_column='summary')
|
||||
|
||||
@ -16,7 +16,7 @@ XLSum_infer_cfg = dict(
|
||||
|
||||
XLSum_eval_cfg = dict(
|
||||
evaluator=dict(type=RougeEvaluator),
|
||||
pred_postprocessor=dict(type='Xsum'),
|
||||
pred_postprocessor=dict(type=Xsum_postprocess),
|
||||
)
|
||||
|
||||
XLSum_datasets = [
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import RougeEvaluator
|
||||
from opencompass.datasets import XsumDataset
|
||||
from opencompass.datasets import XsumDataset, Xsum_postprocess
|
||||
|
||||
Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')
|
||||
|
||||
@ -16,7 +16,7 @@ Xsum_infer_cfg = dict(
|
||||
|
||||
Xsum_eval_cfg = dict(
|
||||
evaluator=dict(type=RougeEvaluator),
|
||||
pred_postprocessor=dict(type='Xsum'),
|
||||
pred_postprocessor=dict(type=Xsum_postprocess),
|
||||
)
|
||||
|
||||
Xsum_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
agieval_reader_cfg = dict(
|
||||
input_columns=['problem_input'], output_column='label')
|
||||
@ -44,7 +45,7 @@ for name in agieval_single_choice_sets:
|
||||
|
||||
agieval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
agieval_datasets.append(
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
|
||||
|
||||
agieval_reader_cfg = dict(
|
||||
input_columns=['question', 'options'], output_column='label')
|
||||
@ -82,7 +83,7 @@ for _name in agieval_single_choice_sets:
|
||||
|
||||
agieval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
agieval_datasets.append(
|
||||
dict(
|
||||
@ -111,7 +112,7 @@ for _name in agieval_multiple_choices_sets:
|
||||
|
||||
agieval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital-multi'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess_multi))
|
||||
|
||||
agieval_datasets.append(
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess_multi
|
||||
|
||||
agieval_single_choice_sets = [
|
||||
'gaokao-chinese',
|
||||
@ -116,7 +117,7 @@ for _name in agieval_multiple_choices_sets:
|
||||
|
||||
agieval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital-multi'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess_multi))
|
||||
|
||||
agieval_datasets.append(
|
||||
dict(
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
apps_reader_cfg = dict(
|
||||
input_columns=['question'], output_column='problem_id', train_split='test')
|
||||
@ -20,7 +20,7 @@ apps_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
apps_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
apps_reader_cfg = dict(
|
||||
input_columns=['question'], output_column='problem_id', train_split='test')
|
||||
@ -27,7 +27,7 @@ apps_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
apps_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
apps_reader_cfg = dict(
|
||||
input_columns=['question'], output_column='problem_id', train_split='test')
|
||||
@ -17,7 +17,7 @@ apps_infer_cfg = dict(
|
||||
apps_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
k=[1, 10, 100],
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
apps_datasets = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .bbh_gen_6bd693 import bbh_datasets # noqa: F401, F403
|
||||
from .bbh_gen_5b92b0 import bbh_datasets # noqa: F401, F403
|
||||
|
@ -3,7 +3,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BBHDataset, BBHEvaluator
|
||||
from opencompass.datasets import BBHDataset, BBHEvaluator, bbh_mcq_postprocess
|
||||
|
||||
bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
|
||||
|
||||
@ -61,8 +61,8 @@ for _name in bbh_multiple_choice_sets:
|
||||
bbh_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type='bbh-mcq'),
|
||||
dataset_postprocessor=dict(type='bbh-mcq'))
|
||||
pred_postprocessor=dict(type=bbh_mcq_postprocess),
|
||||
dataset_postprocessor=dict(type=bbh_mcq_postprocess))
|
||||
|
||||
bbh_datasets.append(
|
||||
dict(
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CEvalDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
@ -166,7 +167,7 @@ for _split in ["val", "test"]:
|
||||
|
||||
ceval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
ceval_datasets.append(
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CEvalDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ceval_subject_mapping = {
|
||||
"computer_network":
|
||||
@ -164,7 +165,9 @@ for _split in ["val"]:
|
||||
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
|
||||
)
|
||||
|
||||
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
|
||||
ceval_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
ceval_datasets.append(
|
||||
dict(
|
||||
|
@ -5,18 +5,18 @@ with read_base():
|
||||
from ..ceval.ceval_ppl_578f8d import ceval_datasets
|
||||
from ..agieval.agieval_mixed_2f14ad import agieval_datasets
|
||||
from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
|
||||
from ..bbh.bbh_gen_6bd693 import bbh_datasets
|
||||
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
|
||||
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
|
||||
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
|
||||
from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
|
||||
from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
|
||||
from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
|
||||
from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
|
||||
from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
|
||||
from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
|
||||
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
|
||||
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
|
||||
from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
|
||||
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
|
||||
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
|
||||
@ -33,24 +33,24 @@ with read_base():
|
||||
from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
|
||||
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
|
||||
from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
|
||||
from ..race.race_ppl_ab8734 import race_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
|
||||
from ..race.race_ppl_a138cd import race_datasets
|
||||
from ..Xsum.Xsum_gen_31397e import Xsum_datasets
|
||||
from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||
from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
|
||||
from ..math.math_gen_3e92f6 import math_datasets
|
||||
from ..TheoremQA.TheoremQA_gen_8acdf7 import TheoremQA_datasets
|
||||
from ..math.math_gen_265cce import math_datasets
|
||||
from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
|
||||
from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
|
||||
from ..ARC_e.ARC_e_ppl_a450bd import ARC_e_datasets
|
||||
from ..ARC_c.ARC_c_ppl_a450bd import ARC_c_datasets
|
||||
from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
|
||||
from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
|
||||
from ..siqa.siqa_ppl_ced5f6 import siqa_datasets
|
||||
from ..strategyqa.strategyqa_gen_b3ff20 import strategyqa_datasets
|
||||
from ..winogrande.winogrande_ppl_18e5de import winogrande_datasets
|
||||
from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
|
||||
from ..winogrande.winogrande_ppl_55a66e import winogrande_datasets
|
||||
from ..obqa.obqa_ppl_c7c154 import obqa_datasets
|
||||
from ..nq.nq_gen_3dcea1 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..flores.flores_gen_806ede import flores_datasets
|
||||
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
|
||||
|
||||
|
@ -2,9 +2,9 @@ from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from ..ceval.ceval_ppl_578f8d import ceval_datasets
|
||||
from ..bbh.bbh_gen_6bd693 import bbh_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
|
||||
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
|
||||
from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
|
||||
from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
|
||||
from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
|
||||
@ -24,16 +24,16 @@ with read_base():
|
||||
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
|
||||
from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
|
||||
from ..race.race_ppl_ab8734 import race_datasets
|
||||
from ..math.math_gen_3e92f6 import math_datasets
|
||||
from ..race.race_ppl_a138cd import race_datasets
|
||||
from ..math.math_gen_265cce import math_datasets
|
||||
from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||
from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
|
||||
from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
|
||||
from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
|
||||
from ..winogrande.winogrande_ppl_18e5de import winogrande_datasets
|
||||
from ..winogrande.winogrande_ppl_55a66e import winogrande_datasets
|
||||
from ..obqa.obqa_ppl_c7c154 import obqa_datasets
|
||||
from ..nq.nq_gen_3dcea1 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -5,19 +5,19 @@ with read_base():
|
||||
from ..ceval.ceval_gen_5f30c7 import ceval_datasets
|
||||
from ..agieval.agieval_gen_397d81 import agieval_datasets
|
||||
from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
|
||||
from ..bbh.bbh_gen_6bd693 import bbh_datasets
|
||||
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
|
||||
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
|
||||
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
|
||||
from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
|
||||
from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
|
||||
from ..CLUE_cmnli.CLUE_cmnli_gen_1abf97 import cmnli_datasets
|
||||
from ..CLUE_ocnli.CLUE_ocnli_gen_c4cb6c import ocnli_datasets
|
||||
from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
|
||||
from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
|
||||
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
|
||||
from ..FewCLUE_csl.FewCLUE_csl_gen_87f4a8 import csl_datasets
|
||||
from ..FewCLUE_csl.FewCLUE_csl_gen_28b223 import csl_datasets
|
||||
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
|
||||
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_f97a97 import ocnli_fc_datasets
|
||||
from ..FewCLUE_tnews.FewCLUE_tnews_gen_b90e4a import tnews_datasets
|
||||
@ -37,20 +37,20 @@ with read_base():
|
||||
from ..race.race_gen_69ee4f import race_datasets
|
||||
from ..Xsum.Xsum_gen_31397e import Xsum_datasets
|
||||
from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||
from ..summedits.summedits_gen_4fb38b import summedits_datasets
|
||||
from ..math.math_gen_3e92f6 import math_datasets
|
||||
from ..TheoremQA.TheoremQA_gen_a27a10 import TheoremQA_datasets
|
||||
from ..summedits.summedits_gen_315438 import summedits_datasets
|
||||
from ..math.math_gen_265cce import math_datasets
|
||||
from ..TheoremQA.TheoremQA_gen_7009de import TheoremQA_datasets
|
||||
from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
|
||||
from ..ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
|
||||
from ..ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
|
||||
from ..commonsenseqa.commonsenseqa_gen_c946f2 import commonsenseqa_datasets
|
||||
from ..piqa.piqa_gen_1194eb import piqa_datasets
|
||||
from ..siqa.siqa_gen_e78df3 import siqa_datasets
|
||||
from ..strategyqa.strategyqa_gen_b3ff20 import strategyqa_datasets
|
||||
from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
|
||||
from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
|
||||
from ..obqa.obqa_gen_9069e4 import obqa_datasets
|
||||
from ..nq.nq_gen_68c1c6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..flores.flores_gen_806ede import flores_datasets
|
||||
from ..crowspairs.crowspairs_gen_02b6c1 import crowspairs_datasets
|
||||
|
||||
|
@ -3,9 +3,9 @@ from mmengine.config import read_base
|
||||
with read_base():
|
||||
from ..mmlu.mmlu_gen_a484b3 import mmlu_datasets
|
||||
from ..ceval.ceval_gen_5f30c7 import ceval_datasets
|
||||
from ..bbh.bbh_gen_6bd693 import bbh_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
|
||||
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
|
||||
from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
|
||||
from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
|
||||
from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
|
||||
@ -24,17 +24,17 @@ with read_base():
|
||||
from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
|
||||
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
|
||||
from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_6dc406 import WSC_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_8a881c import WSC_datasets
|
||||
from ..race.race_gen_69ee4f import race_datasets
|
||||
from ..math.math_gen_3e92f6 import math_datasets
|
||||
from ..math.math_gen_265cce import math_datasets
|
||||
from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||
from ..summedits.summedits_gen_4fb38b import summedits_datasets
|
||||
from ..summedits.summedits_gen_315438 import summedits_datasets
|
||||
from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
|
||||
from ..piqa.piqa_gen_1194eb import piqa_datasets
|
||||
from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
|
||||
from ..obqa.obqa_gen_9069e4 import obqa_datasets
|
||||
from ..nq.nq_gen_68c1c6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..crowspairs.crowspairs_gen_02b6c1 import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -2,6 +2,6 @@ from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from ..piqa.piqa_gen_1194eb import piqa_datasets
|
||||
from ..nq.nq_gen_68c1c6 import nq_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import MDLRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import commonsenseqaDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
commonsenseqa_reader_cfg = dict(
|
||||
input_columns=["question", "A", "B", "C", "D", "E"],
|
||||
@ -44,7 +45,7 @@ commonsenseqa_infer_cfg = dict(
|
||||
|
||||
commonsenseqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
|
@ -3,10 +3,11 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import crowspairsDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
crowspairs_reader_cfg = dict(
|
||||
input_columns=['sent_more', 'sent_less'],
|
||||
output_column='id',
|
||||
output_column='label',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
@ -26,7 +27,7 @@ crowspairs_infer_cfg = dict(
|
||||
crowspairs_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
crowspairs_datasets = [
|
||||
|
@ -6,7 +6,7 @@ from opencompass.datasets import crowspairsDataset
|
||||
|
||||
crowspairs_reader_cfg = dict(
|
||||
input_columns=['sent_more', 'sent_less'],
|
||||
output_column='id',
|
||||
output_column='label',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
|
@ -6,7 +6,7 @@ from opencompass.datasets import crowspairsDataset
|
||||
|
||||
crowspairs_reader_cfg = dict(
|
||||
input_columns=['sent_more', 'sent_less'],
|
||||
output_column='id',
|
||||
output_column='label',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import GovRepcrsDataset
|
||||
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
||||
|
||||
govrepcrs_reader_cfg = dict(
|
||||
input_columns='content',
|
||||
@ -21,8 +22,8 @@ govrepcrs_infer_cfg = dict(
|
||||
|
||||
govrepcrs_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
pred_postprocessor=dict(type=general_cn_postprocess),
|
||||
dataset_postprocessor=dict(type=general_cn_postprocess))
|
||||
|
||||
govrepcrs_datasets = [
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import GovRepcrsDataset
|
||||
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
||||
|
||||
govrepcrs_reader_cfg = dict(
|
||||
input_columns='content',
|
||||
@ -33,8 +34,8 @@ govrepcrs_infer_cfg = dict(
|
||||
govrepcrs_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
pred_postprocessor=dict(type=general_cn_postprocess),
|
||||
dataset_postprocessor=dict(type=general_cn_postprocess))
|
||||
|
||||
govrepcrs_datasets = [
|
||||
dict(
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -26,8 +26,8 @@ gsm8k_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='gsm8k'),
|
||||
dataset_postprocessor=dict(type='gsm8k_dataset'))
|
||||
pred_postprocessor=dict(type=gsm8k_postprocess),
|
||||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
|
||||
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -73,8 +73,8 @@ Question: {question}{answer}
|
||||
|
||||
gsm8k_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='gsm8k'),
|
||||
dataset_postprocessor=dict(type='gsm8k_dataset'))
|
||||
pred_postprocessor=dict(type=gsm8k_postprocess),
|
||||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
|
||||
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -36,8 +36,8 @@ gsm8k_infer_cfg = dict(
|
||||
|
||||
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type='gsm8k'),
|
||||
dataset_postprocessor=dict(type='gsm8k_dataset'))
|
||||
pred_postprocessor=dict(type=gsm8k_postprocess),
|
||||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
|
||||
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import hellaswagDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
hellaswag_reader_cfg = dict(
|
||||
input_columns=["ctx", "A", "B", "C", "D"],
|
||||
@ -30,7 +31,7 @@ hellaswag_infer_cfg = dict(
|
||||
hellaswag_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
hellaswag_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -17,7 +17,7 @@ humaneval_infer_cfg = dict(
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type='humaneval'),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import IWSLT2017Dataset
|
||||
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
||||
|
||||
iwslt2017_reader_cfg = dict(
|
||||
input_columns='en', output_column='de', train_split='validation')
|
||||
@ -15,10 +16,10 @@ iwslt2017_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
iwslt2017_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=general_cn_postprocess),
|
||||
dataset_postprocessor=dict(type=general_cn_postprocess))
|
||||
|
||||
iwslt2017_datasets = [
|
||||
dict(
|
||||
@ -28,4 +29,4 @@ iwslt2017_datasets = [
|
||||
reader_cfg=iwslt2017_reader_cfg,
|
||||
infer_cfg=iwslt2017_infer_cfg,
|
||||
eval_cfg=iwslt2017_eval_cfg)
|
||||
]
|
||||
]
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import IWSLT2017Dataset
|
||||
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
||||
|
||||
iwslt2017_reader_cfg = dict(
|
||||
input_columns='en', output_column='de', train_split='validation')
|
||||
@ -24,10 +25,10 @@ iwslt2017_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
iwslt2017_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=general_cn_postprocess),
|
||||
dataset_postprocessor=dict(type=general_cn_postprocess))
|
||||
|
||||
iwslt2017_datasets = [
|
||||
dict(
|
||||
@ -37,4 +38,4 @@ iwslt2017_datasets = [
|
||||
reader_cfg=iwslt2017_reader_cfg,
|
||||
infer_cfg=iwslt2017_infer_cfg,
|
||||
eval_cfg=iwslt2017_eval_cfg)
|
||||
]
|
||||
]
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import IWSLT2017Dataset
|
||||
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
||||
|
||||
iwslt2017_reader_cfg = dict(
|
||||
input_columns='en', output_column='de', train_split='validation')
|
||||
@ -22,10 +23,10 @@ iwslt2017_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
iwslt2017_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=general_cn_postprocess),
|
||||
dataset_postprocessor=dict(type=general_cn_postprocess))
|
||||
|
||||
iwslt2017_datasets = [
|
||||
dict(
|
||||
@ -35,4 +36,4 @@ iwslt2017_datasets = [
|
||||
reader_cfg=iwslt2017_reader_cfg,
|
||||
infer_cfg=iwslt2017_infer_cfg,
|
||||
eval_cfg=iwslt2017_eval_cfg)
|
||||
]
|
||||
]
|
||||
|
@ -33,8 +33,8 @@ for _l in lang:
|
||||
dict(
|
||||
abbr=f'jigsaw_multilingual_{_l}',
|
||||
type=JigsawMultilingualDataset,
|
||||
path='data/test.csv',
|
||||
label='data/test_labels.csv',
|
||||
path='data/jigsawmultilingual/test.csv',
|
||||
label='data/jigsawmultilingual/test_labels.csv',
|
||||
lang=_l,
|
||||
reader_cfg=jigsawmultilingual_reader_cfg,
|
||||
infer_cfg=jigsawmultilingual_infer_cfg,
|
||||
|
@ -37,8 +37,8 @@ for _l in lang:
|
||||
dict(
|
||||
abbr=f'jigsaw_multilingual_{_l}',
|
||||
type=JigsawMultilingualDataset,
|
||||
path='data/test.csv',
|
||||
label='data/test_labels.csv',
|
||||
path='data/jigsawmultilingual/test.csv',
|
||||
label='data/jigsawmultilingual/test_labels.csv',
|
||||
lang=_l,
|
||||
reader_cfg=jigsawmultilingual_reader_cfg,
|
||||
infer_cfg=jigsawmultilingual_infer_cfg,
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import RougeEvaluator
|
||||
from opencompass.datasets import LCSTSDataset
|
||||
from opencompass.datasets import LCSTSDataset, lcsts_postprocess
|
||||
|
||||
lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')
|
||||
|
||||
@ -18,7 +18,7 @@ lcsts_infer_cfg = dict(
|
||||
lcsts_eval_cfg = dict(
|
||||
evaluator=dict(type=RougeEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='lcsts'),
|
||||
pred_postprocessor=dict(type=lcsts_postprocess),
|
||||
)
|
||||
|
||||
lcsts_datasets = [
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import RougeEvaluator
|
||||
from opencompass.datasets import LCSTSDataset
|
||||
from opencompass.datasets import LCSTSDataset, lcsts_postprocess
|
||||
|
||||
lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')
|
||||
|
||||
@ -14,7 +14,7 @@ lcsts_infer_cfg = dict(
|
||||
|
||||
lcsts_eval_cfg = dict(
|
||||
evaluator=dict(type=RougeEvaluator),
|
||||
pred_postprocessor=dict(type='lcsts'),
|
||||
pred_postprocessor=dict(type=lcsts_postprocess),
|
||||
)
|
||||
|
||||
lcsts_datasets = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .math_gen_3e92f6 import math_datasets # noqa: F401, F403
|
||||
from .math_gen_265cce import math_datasets # noqa: F401, F403
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
|
||||
|
||||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
|
||||
|
||||
@ -12,12 +12,12 @@ math_infer_cfg = dict(
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Problem:\nFind the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
|
||||
"Problem:\nFind the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct."
|
||||
"The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct.\n"
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
@ -27,7 +27,7 @@ math_infer_cfg = dict(
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct."
|
||||
"We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct.\n"
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
@ -37,17 +37,17 @@ math_infer_cfg = dict(
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct."
|
||||
"If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct.\n"
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Problem:\nIf the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
|
||||
"Problem:\nIf the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct."
|
||||
"If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.\n"
|
||||
),
|
||||
dict(role="HUMAN", prompt="Problem:\n{problem}\nSolution:\n"),
|
||||
])),
|
||||
@ -55,7 +55,7 @@ math_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
math_eval_cfg = dict(
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
|
||||
|
||||
math_datasets = [
|
||||
dict(
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
|
||||
|
||||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
|
||||
|
||||
@ -9,28 +9,28 @@ math_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='''Problem:
|
||||
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
|
||||
Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
|
||||
Solution:
|
||||
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
|
||||
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
|
||||
Final Answer: The final answer is $[2,5)$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
|
||||
Solution:
|
||||
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
|
||||
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
|
||||
Final Answer: The final answer is $24$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
|
||||
Solution:
|
||||
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
|
||||
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
|
||||
Final Answer: The final answer is $16$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
|
||||
If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
|
||||
Solution:
|
||||
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
|
||||
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
|
||||
If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
|
||||
Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
{problem}
|
||||
@ -40,7 +40,7 @@ Solution:
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
math_eval_cfg = dict(
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
|
||||
|
||||
math_datasets = [
|
||||
dict(
|
@ -1,34 +1,34 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess
|
||||
|
||||
math_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='''Problem:
|
||||
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
|
||||
Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
|
||||
Solution:
|
||||
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
|
||||
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
|
||||
Final Answer: The final answer is $[2,5)$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
|
||||
Solution:
|
||||
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
|
||||
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
|
||||
Final Answer: The final answer is $24$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
|
||||
Solution:
|
||||
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
|
||||
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
|
||||
Final Answer: The final answer is $16$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
|
||||
If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
|
||||
Solution:
|
||||
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
|
||||
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
|
||||
If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
|
||||
Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
{problem}Solution:
|
||||
@ -37,7 +37,7 @@ Problem:
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
math_eval_cfg = dict(
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
|
||||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))
|
||||
|
||||
math_datasets = [
|
||||
dict(
|
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MBPPDataset, MBPPEvaluator
|
||||
|
||||
mbpp_reader_cfg = dict(
|
||||
input_columns=['text', 'test_list'], output_column='code')
|
||||
input_columns=['text', 'test_list'], output_column='test_list_2')
|
||||
|
||||
mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
|
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MBPPDataset, MBPPEvaluator
|
||||
|
||||
mbpp_reader_cfg = dict(
|
||||
input_columns=['text', 'test_list'], output_column='code')
|
||||
input_columns=['text', 'test_list'], output_column='test_list_2')
|
||||
|
||||
mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
|
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MBPPDataset, MBPPEvaluator
|
||||
|
||||
mbpp_reader_cfg = dict(
|
||||
input_columns=['text', 'test_list'], output_column='code')
|
||||
input_columns=['text', 'test_list'], output_column='test_list_2')
|
||||
|
||||
mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MMLUDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
||||
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
||||
@ -33,7 +34,7 @@ mmlu_infer_cfg = dict(
|
||||
|
||||
mmlu_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
mmlu_all_sets = [
|
||||
"college_biology",
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MMLUDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
||||
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
||||
@ -107,7 +108,7 @@ for _name in mmlu_all_sets:
|
||||
|
||||
mmlu_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type="first-capital"))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
mmlu_datasets.append(
|
||||
dict(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MMLUDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
||||
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
||||
@ -92,7 +93,7 @@ for _name in mmlu_all_sets:
|
||||
|
||||
mmlu_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
mmlu_datasets.append(
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MMLUDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
|
||||
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
|
||||
@ -107,7 +108,7 @@ for _name in mmlu_all_sets:
|
||||
|
||||
mmlu_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type="first-capital"))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
mmlu_datasets.append(
|
||||
dict(
|
||||
|
30
configs/datasets/nq/nq_gen_c788f6.py
Normal file
30
configs/datasets/nq/nq_gen_c788f6.py
Normal file
@ -0,0 +1,30 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
|
||||
|
||||
nq_reader_cfg = dict(
|
||||
input_columns=['question'], output_column='answer', train_split='test')
|
||||
|
||||
nq_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='Answer these questions, your answer should be as simple as possible, start your answer with the prompt \'The answer is \'.\nQ: {question}?'),
|
||||
dict(role='BOT', prompt='A:'),
|
||||
], )),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
|
||||
|
||||
nq_datasets = [
|
||||
dict(
|
||||
type=NaturalQuestionDataset,
|
||||
abbr='nq',
|
||||
path='./data/nq/',
|
||||
reader_cfg=nq_reader_cfg,
|
||||
infer_cfg=nq_infer_cfg,
|
||||
eval_cfg=nq_eval_cfg)
|
||||
]
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import OBQADataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
_input_columns = [
|
||||
["question_stem", "A", "B", "C", "D"],
|
||||
@ -54,7 +55,7 @@ for _i in range(2):
|
||||
obqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import piqaDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
piqa_reader_cfg = dict(
|
||||
input_columns=["goal", "sol1", "sol2"],
|
||||
@ -24,7 +25,7 @@ piqa_infer_cfg = dict(
|
||||
piqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
piqa_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
@ -23,7 +24,7 @@ race_infer_cfg = dict(
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
race_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
@ -18,7 +19,7 @@ race_infer_cfg = dict(
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='first-capital'))
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .race_ppl_ab8734 import race_datasets # noqa: F401, F403
|
||||
from .race_ppl_a138cd import race_datasets # noqa: F401, F403
|
||||
|
@ -18,10 +18,9 @@ race_infer_cfg = dict(
|
||||
prompt=
|
||||
"Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
|
||||
),
|
||||
dict(role="BOT", prompt=ans_token),
|
||||
dict(role="BOT", prompt=f'A: {ans}'),
|
||||
])
|
||||
for ans, ans_token in [["A", "{A}"], ["B", "{B}"], ["C", "{C}"],
|
||||
["D", "{D}"]]
|
||||
for ans in ['A', 'B', 'C', 'D']
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
@ -27,8 +27,9 @@ realtoxicprompts_eval_cfg = dict(
|
||||
|
||||
realtoxicprompts_datasets = [
|
||||
dict(
|
||||
abbr='real-toxicity-prompts',
|
||||
type=RealToxicPromptsDataset,
|
||||
path='allenai/real-toxicity-prompts',
|
||||
path='data/realtoxicprompts/realtoxicprompts_train.arrow',
|
||||
challenging_subset=True,
|
||||
reader_cfg=realtoxicprompts_reader_cfg,
|
||||
infer_cfg=realtoxicprompts_infer_cfg,
|
||||
|
@ -25,8 +25,9 @@ realtoxicprompts_eval_cfg = dict(
|
||||
|
||||
realtoxicprompts_datasets = [
|
||||
dict(
|
||||
abbr='real-toxicity-prompts',
|
||||
type=RealToxicPromptsDataset,
|
||||
path='allenai/real-toxicity-prompts',
|
||||
path='data/realtoxicprompts/realtoxicprompts_train.arrow',
|
||||
challenging_subset=True,
|
||||
reader_cfg=realtoxicprompts_reader_cfg,
|
||||
infer_cfg=realtoxicprompts_infer_cfg,
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import siqaDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
siqa_reader_cfg = dict(
|
||||
input_columns=["context", "question", "answerA", "answerB", "answerC"],
|
||||
@ -28,7 +29,7 @@ siqa_infer_cfg = dict(
|
||||
siqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
siqa_datasets = [
|
||||
|
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import storyclozeDataset_V2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
storycloze_reader_cfg = dict(
|
||||
input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
|
||||
@ -27,7 +28,7 @@ storycloze_infer_cfg = dict(
|
||||
storycloze_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
# The original story cloze dataset and repo are not long maintaining.
|
||||
|
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .strategyqa_gen_b3ff20 import strategyqa_datasets # noqa: F401, F403
|
||||
from .strategyqa_gen_1180a7 import strategyqa_datasets # noqa: F401, F403
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
|
||||
strategyqa_reader_cfg = dict(
|
||||
input_columns=['question'],
|
||||
@ -23,7 +23,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes'
|
||||
'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes\n'
|
||||
),
|
||||
dict(
|
||||
role='HUMAN',
|
||||
@ -33,7 +33,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes'
|
||||
'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes\n'
|
||||
),
|
||||
dict(
|
||||
role='HUMAN',
|
||||
@ -43,7 +43,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no'
|
||||
'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no\n'
|
||||
),
|
||||
dict(
|
||||
role='HUMAN',
|
||||
@ -53,7 +53,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes'
|
||||
'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes\n'
|
||||
),
|
||||
dict(
|
||||
role='HUMAN',
|
||||
@ -63,7 +63,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no'
|
||||
'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no\n'
|
||||
),
|
||||
dict(
|
||||
role='HUMAN',
|
||||
@ -71,7 +71,7 @@ strategyqa_infer_cfg = dict(
|
||||
dict(
|
||||
role='BOT',
|
||||
prompt=
|
||||
'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no'
|
||||
'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no\n'
|
||||
),
|
||||
dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
|
||||
], )),
|
||||
@ -80,8 +80,8 @@ strategyqa_infer_cfg = dict(
|
||||
|
||||
strategyqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='strategyqa'),
|
||||
dataset_postprocessor=dict(type='strategyqa_dataset'))
|
||||
pred_postprocessor=dict(type=strategyqa_pred_postprocess),
|
||||
dataset_postprocessor=dict(type=strategyqa_dataset_postprocess))
|
||||
|
||||
strategyqa_datasets = [
|
||||
dict(
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
|
||||
strategyqa_reader_cfg = dict(
|
||||
input_columns=['question'],
|
||||
@ -44,8 +44,8 @@ Q: {question}{answer}
|
||||
|
||||
strategyqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='strategyqa'),
|
||||
dataset_postprocessor=dict(type='strategyqa_dataset'))
|
||||
pred_postprocessor=dict(type=strategyqa_pred_postprocess),
|
||||
dataset_postprocessor=dict(type=strategyqa_dataset_postprocess))
|
||||
|
||||
strategyqa_datasets = [
|
||||
dict(
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user