mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Add Release Contraibution
This commit is contained in:
parent
36f111100f
commit
cbe9fe2cdb
69
.pre-commit-config.yaml
Normal file
69
.pre-commit-config.yaml
Normal file
@ -0,0 +1,69 @@
|
||||
exclude: |
|
||||
(?x)^(
|
||||
tests/data/|
|
||||
opencompass/models/internal/|
|
||||
opencompass/utils/internal/|
|
||||
configs/
|
||||
)
|
||||
repos:
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.11.5
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
rev: v0.32.0
|
||||
hooks:
|
||||
- id: yapf
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.2.1
|
||||
hooks:
|
||||
- id: codespell
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: requirements-txt-fixer
|
||||
- id: double-quote-string-fixer
|
||||
- id: check-merge-conflict
|
||||
- id: fix-encoding-pragma
|
||||
args: ["--remove"]
|
||||
- id: mixed-line-ending
|
||||
args: ["--fix=lf"]
|
||||
- id: mixed-line-ending
|
||||
args: ["--fix=lf"]
|
||||
- repo: https://github.com/executablebooks/mdformat
|
||||
rev: 0.7.9
|
||||
hooks:
|
||||
- id: mdformat
|
||||
args: ["--number", "--table-width", "200"]
|
||||
additional_dependencies:
|
||||
- mdformat-openmmlab
|
||||
- mdformat_frontmatter
|
||||
- linkify-it-py
|
||||
- repo: https://github.com/myint/docformatter
|
||||
rev: v1.3.1
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: ["--in-place", "--wrap-descriptions", "79"]
|
||||
# - repo: https://github.com/open-mmlab/pre-commit-hooks
|
||||
# rev: v0.2.0 # Use the ref you want to point at
|
||||
# hooks:
|
||||
# - id: check-algo-readme
|
||||
# - id: check-copyright
|
||||
# args: ["mmocr", "tests", "tools"] # these directories will be checked
|
4
configs/datasets/ARC_e/ARC_e_ppl.py
Normal file
4
configs/datasets/ARC_e/ARC_e_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ARC_e_ppl_f86898 import ARC_e_datasets # noqa: F401, F403
|
37
configs/datasets/CLUE_C3/CLUE_C3_ppl_588820.py
Normal file
37
configs/datasets/CLUE_C3/CLUE_C3_ppl_588820.py
Normal file
@ -0,0 +1,37 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import C3Dataset
|
||||
|
||||
C3_reader_cfg = dict(
|
||||
input_columns=[
|
||||
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
|
||||
'choices'
|
||||
],
|
||||
output_column='label')
|
||||
|
||||
C3_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
i: dict(round=[
|
||||
dict(role="HUMAN", prompt="文章:{content}\n问题:{question}"),
|
||||
dict(role="BOT", prompt=f"答案:{{choice{i}}}")
|
||||
])
|
||||
for i in range(4)
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
C3_datasets = [
|
||||
dict(
|
||||
type=C3Dataset,
|
||||
abbr='C3',
|
||||
path='./data/CLUE/C3/dev_0.json',
|
||||
reader_cfg=C3_reader_cfg,
|
||||
infer_cfg=C3_infer_cfg,
|
||||
eval_cfg=C3_eval_cfg)
|
||||
]
|
34
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_72a8d5.py
Normal file
34
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_72a8d5.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import CMRCDataset
|
||||
|
||||
CMRC_reader_cfg = dict(
|
||||
input_columns=['question', 'context'], output_column='answers')
|
||||
|
||||
CMRC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="文章:{context}\n根据上文,回答如下问题:\n{question}\n答:"),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
CMRC_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator),
|
||||
pred_role="BOT",
|
||||
)
|
||||
|
||||
CMRC_datasets = [
|
||||
dict(
|
||||
type=CMRCDataset,
|
||||
abbr='CMRC_dev',
|
||||
path='./data/CLUE/CMRC/dev.json',
|
||||
reader_cfg=CMRC_reader_cfg,
|
||||
infer_cfg=CMRC_infer_cfg,
|
||||
eval_cfg=CMRC_eval_cfg),
|
||||
]
|
33
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_d7096f.py
Normal file
33
configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_d7096f.py
Normal file
@ -0,0 +1,33 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import CMRCDataset
|
||||
|
||||
CMRC_reader_cfg = dict(
|
||||
input_columns=['question', 'context'], output_column='answers')
|
||||
|
||||
CMRC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(role="HUMAN", prompt="文章:{context}\n根据上文,回答如下问题:{question}"),
|
||||
dict(role="BOT", prompt="答:"),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
CMRC_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator),
|
||||
pred_role="BOT",
|
||||
)
|
||||
|
||||
CMRC_datasets = [
|
||||
dict(
|
||||
type=CMRCDataset,
|
||||
abbr='CMRC_dev',
|
||||
path='./data/CLUE/CMRC/dev.json',
|
||||
reader_cfg=CMRC_reader_cfg,
|
||||
infer_cfg=CMRC_infer_cfg,
|
||||
eval_cfg=CMRC_eval_cfg),
|
||||
]
|
34
configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_03b96b.py
Normal file
34
configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_03b96b.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import DRCDDataset
|
||||
|
||||
DRCD_reader_cfg = dict(
|
||||
input_columns=['question', 'context'], output_column='answers')
|
||||
|
||||
DRCD_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="文章:{context}\n根据上文,回答如下问题:\n{question}\n答:"),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
DRCD_eval_cfg = dict(
|
||||
evaluator=dict(type=EMEvaluator),
|
||||
pred_role="BOT",
|
||||
)
|
||||
|
||||
DRCD_datasets = [
|
||||
dict(
|
||||
type=DRCDDataset,
|
||||
abbr='DRCD_dev',
|
||||
path='./data/CLUE/DRCD/dev.json',
|
||||
reader_cfg=DRCD_reader_cfg,
|
||||
infer_cfg=DRCD_infer_cfg,
|
||||
eval_cfg=DRCD_eval_cfg),
|
||||
]
|
50
configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen_305431.py
Normal file
50
configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen_305431.py
Normal file
@ -0,0 +1,50 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AFQMCDataset_V2
|
||||
|
||||
bustm_reader_cfg = dict(
|
||||
input_columns=["sentence1", "sentence2"],
|
||||
output_column="label",
|
||||
test_split="train")
|
||||
|
||||
bustm_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"语句一:“{sentence1}”\n语句二:“{sentence2}”\n请判断语句一和语句二说的是否是一个意思?\nA. 无关\nB. 相关\n请从“A”,“B”中进行选择。\n答:",
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
bustm_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
bustm_datasets = [
|
||||
dict(
|
||||
abbr="bustm-dev",
|
||||
type=AFQMCDataset_V2, # bustm share the same format with AFQMC
|
||||
path="./data/FewCLUE/bustm/dev_few_all.json",
|
||||
reader_cfg=bustm_reader_cfg,
|
||||
infer_cfg=bustm_infer_cfg,
|
||||
eval_cfg=bustm_eval_cfg,
|
||||
),
|
||||
dict(
|
||||
abbr="bustm-test",
|
||||
type=AFQMCDataset_V2, # bustm share the same format with AFQMC
|
||||
path="./data/FewCLUE/bustm/test_public.json",
|
||||
reader_cfg=bustm_reader_cfg,
|
||||
infer_cfg=bustm_infer_cfg,
|
||||
eval_cfg=bustm_eval_cfg,
|
||||
),
|
||||
]
|
45
configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl_b6cd88.py
Normal file
45
configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl_b6cd88.py
Normal file
@ -0,0 +1,45 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CHIDDataset
|
||||
|
||||
chid_reader_cfg = dict(
|
||||
input_columns=[f'content{i}' for i in range(7)], output_column='answer')
|
||||
|
||||
chid_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
i: dict(
|
||||
round=[
|
||||
dict(role="HUMAN", prompt=f"以下句子是否通顺?\n{{content{i}}}"),
|
||||
dict(role="BOT", prompt="这个句子是通顺的。"),
|
||||
], )
|
||||
for i in range(7)
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
chid_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_role="BOT")
|
||||
|
||||
chid_datasets = [
|
||||
dict(
|
||||
type=CHIDDataset,
|
||||
path='json',
|
||||
abbr='chid-dev',
|
||||
data_files='./data/FewCLUE/chid/dev_few_all.json',
|
||||
split='train',
|
||||
reader_cfg=chid_reader_cfg,
|
||||
infer_cfg=chid_infer_cfg,
|
||||
eval_cfg=chid_eval_cfg),
|
||||
dict(
|
||||
type=CHIDDataset,
|
||||
path='json',
|
||||
abbr='chid-test',
|
||||
data_files='./data/FewCLUE/chid/test_public.json',
|
||||
split='train',
|
||||
reader_cfg=chid_reader_cfg,
|
||||
infer_cfg=chid_infer_cfg,
|
||||
eval_cfg=chid_eval_cfg),
|
||||
]
|
@ -0,0 +1,58 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import CluewscDataset
|
||||
|
||||
cluewsc_reader_cfg = dict(
|
||||
input_columns=['span1', 'span2', 'text', 'new_text'],
|
||||
output_column='answer')
|
||||
|
||||
cluewsc_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
0:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"?"
|
||||
),
|
||||
dict(role="BOT", prompt="No.")
|
||||
]),
|
||||
1:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"?"
|
||||
),
|
||||
dict(role="BOT", prompt="Yes.")
|
||||
]),
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
cluewsc_datasets = [
|
||||
dict(
|
||||
type=CluewscDataset,
|
||||
path='json',
|
||||
abbr='cluewsc-dev',
|
||||
data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
|
||||
split='train',
|
||||
reader_cfg=cluewsc_reader_cfg,
|
||||
infer_cfg=cluewsc_infer_cfg,
|
||||
eval_cfg=cluewsc_eval_cfg),
|
||||
dict(
|
||||
type=CluewscDataset,
|
||||
path='json',
|
||||
abbr='cluewsc-test',
|
||||
data_files='./data/FewCLUE/cluewsc/test_public.json',
|
||||
split='train',
|
||||
reader_cfg=cluewsc_reader_cfg,
|
||||
infer_cfg=cluewsc_infer_cfg,
|
||||
eval_cfg=cluewsc_eval_cfg),
|
||||
]
|
48
configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_784b9e.py
Normal file
48
configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_784b9e.py
Normal file
@ -0,0 +1,48 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TNewsDataset
|
||||
|
||||
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
|
||||
|
||||
tnews_labels = [
|
||||
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
|
||||
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
|
||||
]
|
||||
|
||||
tnews_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
lb: dict(round=[
|
||||
dict(role='HUMAN', prompt='{sentence}\n上述内容属于什么新闻?'),
|
||||
dict(role='BOT', prompt=lb)
|
||||
])
|
||||
for lb in tnews_labels
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
tnews_datasets = [
|
||||
dict(
|
||||
type=TNewsDataset,
|
||||
path='json',
|
||||
abbr='tnews-dev',
|
||||
data_files='./data/FewCLUE/tnews/dev_few_all.json',
|
||||
split='train',
|
||||
reader_cfg=tnews_reader_cfg,
|
||||
infer_cfg=tnews_infer_cfg,
|
||||
eval_cfg=tnews_eval_cfg),
|
||||
dict(
|
||||
type=TNewsDataset,
|
||||
path='json',
|
||||
abbr='tnews-test',
|
||||
data_files='./data/FewCLUE/tnews/test_public.json',
|
||||
split='train',
|
||||
reader_cfg=tnews_reader_cfg,
|
||||
infer_cfg=tnews_infer_cfg,
|
||||
eval_cfg=tnews_eval_cfg)
|
||||
]
|
4
configs/datasets/GaokaoBench/GaokaoBench_gen.py
Normal file
4
configs/datasets/GaokaoBench/GaokaoBench_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .GaokaoBench_gen_aed980 import GaokaoBench_datasets # noqa: F401, F403
|
42
configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_7a5dee.py
Normal file
42
configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_7a5dee.py
Normal file
@ -0,0 +1,42 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import AXDataset_V2
|
||||
|
||||
AX_g_reader_cfg = dict(
|
||||
input_columns=["hypothesis", "premise"],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
AX_g_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:"
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
AX_g_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
AX_g_datasets = [
|
||||
dict(
|
||||
abbr="AX_g",
|
||||
type=AXDataset_V2,
|
||||
path="./data/SuperGLUE/AX-g/AX-g.jsonl",
|
||||
reader_cfg=AX_g_reader_cfg,
|
||||
infer_cfg=AX_g_infer_cfg,
|
||||
eval_cfg=AX_g_eval_cfg,
|
||||
)
|
||||
]
|
53
configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_8d9bf9.py
Normal file
53
configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_8d9bf9.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
|
||||
AX_g_reader_cfg = dict(
|
||||
input_columns=["hypothesis", "premise"],
|
||||
output_column="label",
|
||||
test_split="train")
|
||||
|
||||
AX_g_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
"entailment":
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
|
||||
),
|
||||
dict(role="BOT", prompt="Yes"),
|
||||
]),
|
||||
"not_entailment":
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
|
||||
),
|
||||
dict(role="BOT", prompt="No"),
|
||||
])
|
||||
},
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
AX_g_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
AX_g_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
abbr="AX_g",
|
||||
path="json",
|
||||
data_files="./data/SuperGLUE/AX-g/AX-g.jsonl",
|
||||
split="train",
|
||||
reader_cfg=AX_g_reader_cfg,
|
||||
infer_cfg=AX_g_infer_cfg,
|
||||
eval_cfg=AX_g_eval_cfg,
|
||||
)
|
||||
]
|
4
configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl.py
Normal file
4
configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets # noqa: F401, F403
|
4
configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen.py
Normal file
4
configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .SuperGLUE_CB_gen_bb97e1 import CB_datasets # noqa: F401, F403
|
43
configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_6d5e67.py
Normal file
43
configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_6d5e67.py
Normal file
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import COPADataset_V2
|
||||
|
||||
COPA_reader_cfg = dict(
|
||||
input_columns=["question", "premise", "choice1", "choice2"],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
COPA_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{premise}\nQuestion: Which may be the {question}?\nA. {choice1}\nB. {choice2}\nAnswer:"
|
||||
),
|
||||
], ),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
COPA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
COPA_datasets = [
|
||||
dict(
|
||||
abbr="COPA",
|
||||
type=COPADataset_V2,
|
||||
path="./data/SuperGLUE/COPA/val.jsonl",
|
||||
reader_cfg=COPA_reader_cfg,
|
||||
infer_cfg=COPA_infer_cfg,
|
||||
eval_cfg=COPA_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,42 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import MultiRCDataset_V2
|
||||
|
||||
MultiRC_reader_cfg = dict(
|
||||
input_columns=["question", "text", "answer"],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
MultiRC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{text}\nQuestion: {question}\nAnswer: {answer}\nIs it true?\nA. Yes\nB. No\nAnswer:"
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
MultiRC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
MultiRC_datasets = [
|
||||
dict(
|
||||
abbr="MultiRC",
|
||||
type=MultiRCDataset_V2,
|
||||
path="./data/SuperGLUE/MultiRC/val.jsonl",
|
||||
reader_cfg=MultiRC_reader_cfg,
|
||||
infer_cfg=MultiRC_infer_cfg,
|
||||
eval_cfg=MultiRC_eval_cfg,
|
||||
)
|
||||
]
|
46
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen_c39367.py
Normal file
46
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen_c39367.py
Normal file
@ -0,0 +1,46 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WiCDataset_V2
|
||||
|
||||
WiC_reader_cfg = dict(
|
||||
input_columns=[
|
||||
"word",
|
||||
"sentence1",
|
||||
"sentence2",
|
||||
],
|
||||
output_column="label",
|
||||
)
|
||||
|
||||
WiC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\nAre '{word}' in the above two sentenses the same?\nA. Yes\nB. No\nAnswer:"
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
WiC_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
WiC_datasets = [
|
||||
dict(
|
||||
abbr="WiC",
|
||||
type=WiCDataset_V2,
|
||||
path="./data/SuperGLUE/WiC/val.jsonl",
|
||||
reader_cfg=WiC_reader_cfg,
|
||||
infer_cfg=WiC_infer_cfg,
|
||||
eval_cfg=WiC_eval_cfg,
|
||||
)
|
||||
]
|
55
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl_4118db.py
Normal file
55
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl_4118db.py
Normal file
@ -0,0 +1,55 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WiCDataset
|
||||
|
||||
WiC_reader_cfg = dict(
|
||||
input_columns=[
|
||||
"word",
|
||||
"sentence1",
|
||||
"sentence2",
|
||||
],
|
||||
output_column="answer",
|
||||
test_split="train")
|
||||
|
||||
WiC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
0:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\n'{word}' in the above two sentenses are different."
|
||||
),
|
||||
]),
|
||||
1:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\n'{word}' in the above two sentenses are the same."
|
||||
),
|
||||
]),
|
||||
},
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
WiC_datasets = [
|
||||
dict(
|
||||
type=WiCDataset,
|
||||
abbr="WiC",
|
||||
path="json",
|
||||
data_files="./data/SuperGLUE/WiC/val.jsonl",
|
||||
split="train",
|
||||
reader_cfg=WiC_reader_cfg,
|
||||
infer_cfg=WiC_infer_cfg,
|
||||
eval_cfg=WiC_eval_cfg,
|
||||
)
|
||||
]
|
49
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl_d316eb.py
Normal file
49
configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl_d316eb.py
Normal file
@ -0,0 +1,49 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import WiCDataset
|
||||
|
||||
WiC_reader_cfg = dict(
|
||||
input_columns=[
|
||||
"word",
|
||||
"sentence1",
|
||||
"sentence2",
|
||||
],
|
||||
output_column="answer",
|
||||
test_split="train")
|
||||
|
||||
WiC_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
0:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="{word} in {sentence1} and {sentence2} is different."),
|
||||
]),
|
||||
1:
|
||||
dict(round=[
|
||||
dict(role="HUMAN", prompt="{word} in {sentence1} and {sentence2} is same."),
|
||||
]),
|
||||
},
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
WiC_datasets = [
|
||||
dict(
|
||||
type=WiCDataset,
|
||||
abbr="WiC",
|
||||
path="json",
|
||||
data_files="./data/SuperGLUE/WiC/val.jsonl",
|
||||
split="train",
|
||||
reader_cfg=WiC_reader_cfg,
|
||||
infer_cfg=WiC_infer_cfg,
|
||||
eval_cfg=WiC_eval_cfg,
|
||||
)
|
||||
]
|
40
configs/datasets/TheoremQA/TheoremQA_gen_74abc9.py
Normal file
40
configs/datasets/TheoremQA/TheoremQA_gen_74abc9.py
Normal file
@ -0,0 +1,40 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TheoremQADataset
|
||||
|
||||
TheoremQA_reader_cfg = dict(
|
||||
input_columns=['Question', 'Answer_type'],
|
||||
output_column='Answer',
|
||||
train_split='test')
|
||||
|
||||
TheoremQA_prompt1 = "Please read a math problem, and then think step by step to derive the answer. The answer is decided by Answer Type. " \
|
||||
"If the Answer type in [bool], the answer needs to be True or False. " \
|
||||
"Else if the Answer type in [integer, float] , The answer needs to be in numerical form. " \
|
||||
"Else if the Answer type in [list of integer, list of float] , the answer needs to be a list of number like [2, 3, 4]. " \
|
||||
"Else if the Answer type in [option], the answer needs to be an option like (a), (b), (c), (d)." \
|
||||
"You need to output the answer in your final sentence like 'Therefore, the answer is ...'."
|
||||
TheoremQA_prompt2 = f"Below is an instruction that describes a task, paired with an input that provides further context. " \
|
||||
f"Write a response that appropriately completes the request.\n\n### Instruction:\n{TheoremQA_prompt1}\n\n### Input:\n{{Question}}\nAnswer_type:{{Answer_type}}\n### Response:\n"
|
||||
|
||||
TheoremQA_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=TheoremQA_prompt2),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
TheoremQA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type='TheoremQA'))
|
||||
|
||||
TheoremQA_datasets = [
|
||||
dict(
|
||||
abbr='TheoremQA',
|
||||
type=TheoremQADataset,
|
||||
path="./data/TheoremQA/test.csv",
|
||||
reader_cfg=TheoremQA_reader_cfg,
|
||||
infer_cfg=TheoremQA_infer_cfg,
|
||||
eval_cfg=TheoremQA_eval_cfg)
|
||||
]
|
4
configs/datasets/ceval/ceval_gen.py
Normal file
4
configs/datasets/ceval/ceval_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ceval_gen_ee2cb0 import ceval_datasets # noqa: F401, F403
|
31
configs/datasets/civilcomments/civilcomments_ppl_e01497.py
Normal file
31
configs/datasets/civilcomments/civilcomments_ppl_e01497.py
Normal file
@ -0,0 +1,31 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import CLPInferencer
|
||||
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
|
||||
from opencompass.datasets import CivilCommentsDataset
|
||||
|
||||
civilcomments_reader_cfg = dict(
|
||||
input_columns=['text'],
|
||||
output_column='label',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
civilcomments_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='Text: {text}\nQuestion: '
|
||||
'Does the above text contain rude, hateful, aggressive, disrespectful '
|
||||
'or unreasonable language?\nAnswer:'),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=CLPInferencer))
|
||||
|
||||
civilcomments_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
|
||||
|
||||
civilcomments_datasets = [
|
||||
dict(
|
||||
type=CivilCommentsDataset,
|
||||
path='civil_comments',
|
||||
reader_cfg=civilcomments_reader_cfg,
|
||||
infer_cfg=civilcomments_infer_cfg,
|
||||
eval_cfg=civilcomments_eval_cfg)
|
||||
]
|
57
configs/datasets/collections/chat_medium.py
Normal file
57
configs/datasets/collections/chat_medium.py
Normal file
@ -0,0 +1,57 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from ..mmlu.mmlu_gen_a568f1 import mmlu_datasets
|
||||
from ..ceval.ceval_gen_ee2cb0 import ceval_datasets
|
||||
from ..agieval.agieval_gen_dc7dae import agieval_datasets
|
||||
from ..GaokaoBench.GaokaoBench_gen_aed980 import GaokaoBench_datasets
|
||||
from ..bbh.bbh_gen_58abc3 import bbh_datasets
|
||||
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
|
||||
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
|
||||
from ..CLUE_C3.CLUE_C3_gen_9e3de9 import C3_datasets
|
||||
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
|
||||
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
|
||||
from ..CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets
|
||||
from ..CLUE_cmnli.CLUE_cmnli_gen_316313 import cmnli_datasets
|
||||
from ..CLUE_ocnli.CLUE_ocnli_gen_7c44b0 import ocnli_datasets
|
||||
from ..FewCLUE_bustm.FewCLUE_bustm_gen_305431 import bustm_datasets
|
||||
from ..FewCLUE_chid.FewCLUE_chid_gen_686c63 import chid_datasets
|
||||
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_276956 import cluewsc_datasets
|
||||
from ..FewCLUE_csl.FewCLUE_csl_gen_1b0c02 import csl_datasets
|
||||
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_d6d06d import eprstmt_datasets
|
||||
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_bef37f import ocnli_fc_datasets
|
||||
from ..FewCLUE_tnews.FewCLUE_tnews_gen_8d59ba import tnews_datasets
|
||||
from ..lcsts.lcsts_gen_427fde import lcsts_datasets
|
||||
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
|
||||
from ..storycloze.storycloze_gen_c5a230 import storycloze_datasets
|
||||
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_477186 import AX_b_datasets
|
||||
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets
|
||||
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets
|
||||
from ..SuperGLUE_CB.SuperGLUE_CB_gen_bb97e1 import CB_datasets
|
||||
from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_6d5e67 import COPA_datasets
|
||||
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets
|
||||
from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_ce346a import RTE_datasets
|
||||
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
|
||||
from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_c39367 import WiC_datasets
|
||||
from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_d8d441 import WSC_datasets
|
||||
from ..race.race_gen_12de48 import race_datasets
|
||||
from ..Xsum.Xsum_gen_d2126e import Xsum_datasets
|
||||
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
|
||||
from ..summedits.summedits_gen_4f35b5 import summedits_datasets
|
||||
from ..math.math_gen_78bcba import math_datasets
|
||||
from ..TheoremQA.TheoremQA_gen_891fcf import TheoremQA_datasets
|
||||
from ..hellaswag.hellaswag_gen_cae9cb import hellaswag_datasets
|
||||
from ..ARC_e.ARC_e_gen_0a29bf import ARC_e_datasets
|
||||
from ..ARC_c.ARC_c_gen_3f3039 import ARC_c_datasets
|
||||
from ..commonsenseqa.commonsenseqa_gen_a58dbd import commonsenseqa_datasets
|
||||
from ..piqa.piqa_gen_8287ae import piqa_datasets
|
||||
from ..siqa.siqa_gen_a3c714 import siqa_datasets
|
||||
from ..strategyqa.strategyqa_gen_be3f8d import strategyqa_datasets
|
||||
from ..winogrande.winogrande_gen_c19d87 import winogrande_datasets
|
||||
from ..obqa.obqa_gen_b2cde9 import obqa_datasets
|
||||
from ..nq.nq_gen_a6ffca import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
|
||||
from ..flores.flores_gen_8eb9ca import flores_datasets
|
||||
from ..crowspairs.crowspairs_gen_dd110a import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
60
configs/datasets/commonsenseqa/commonsenseqa_gen_a58dbd.py
Normal file
60
configs/datasets/commonsenseqa/commonsenseqa_gen_a58dbd.py
Normal file
@ -0,0 +1,60 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import MDLRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import commonsenseqaDataset
|
||||
|
||||
commonsenseqa_reader_cfg = dict(
|
||||
input_columns=["question", "A", "B", "C", "D", "E"],
|
||||
output_column="answerKey",
|
||||
test_split="validation")
|
||||
|
||||
_ice_template = dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer:",
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt="{answerKey}",
|
||||
),
|
||||
],
|
||||
),
|
||||
ice_token="</E>",
|
||||
)
|
||||
|
||||
commonsenseqa_infer_cfg = dict(
|
||||
ice_template=_ice_template,
|
||||
retriever=dict(
|
||||
type=MDLRetriever,
|
||||
ice_num=8,
|
||||
candidate_num=30,
|
||||
select_time=10,
|
||||
seed=1,
|
||||
batch_size=12,
|
||||
ice_template=_ice_template,
|
||||
),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
commonsenseqa_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
type=commonsenseqaDataset,
|
||||
path="commonsense_qa",
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg,
|
||||
)
|
||||
]
|
||||
|
||||
del _ice_template
|
40
configs/datasets/glm/C3.py
Normal file
40
configs/datasets/glm/C3.py
Normal file
@ -0,0 +1,40 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import C3Dataset
|
||||
|
||||
C3_reader_cfg = dict(
|
||||
input_columns=[
|
||||
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
|
||||
'choices'
|
||||
],
|
||||
output_column='label')
|
||||
|
||||
C3_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
0:
|
||||
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice0}",
|
||||
1:
|
||||
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice1}",
|
||||
2:
|
||||
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice2}",
|
||||
3:
|
||||
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice3}",
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
C3_datasets = [
|
||||
dict(
|
||||
type=C3Dataset,
|
||||
abbr='C3',
|
||||
path='./data/CLUE/C3/dev_0.json',
|
||||
reader_cfg=C3_reader_cfg,
|
||||
infer_cfg=C3_infer_cfg,
|
||||
eval_cfg=C3_eval_cfg)
|
||||
]
|
41
configs/datasets/glm/tnews.py
Normal file
41
configs/datasets/glm/tnews.py
Normal file
@ -0,0 +1,41 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import TNewsDataset
|
||||
|
||||
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
|
||||
|
||||
tnews_labels = [
|
||||
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
|
||||
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
|
||||
]
|
||||
|
||||
tnews_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={lb: f'</E></S>这篇新闻属于:{lb}'
|
||||
for lb in tnews_labels},
|
||||
column_token_map={'sentence': '</S>'},
|
||||
ice_token='</E>'),
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='</E></S>\n以上这篇新闻属于',
|
||||
column_token_map={'sentence': '</S>'},
|
||||
ice_token='</E>'),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GLMChoiceInferencer, choices=tnews_labels))
|
||||
|
||||
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
tnews_datasets = [
|
||||
dict(
|
||||
type=TNewsDataset,
|
||||
path='json',
|
||||
abbr='tnews',
|
||||
data_files='./data/FewCLUE/tnews/test_public.json',
|
||||
split='train',
|
||||
reader_cfg=tnews_reader_cfg,
|
||||
infer_cfg=tnews_infer_cfg,
|
||||
eval_cfg=tnews_eval_cfg)
|
||||
]
|
4
configs/datasets/govrepcrs/govrepcrs_gen.py
Normal file
4
configs/datasets/govrepcrs/govrepcrs_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .govrepcrs_gen_455586 import govrepcrs_datasets # noqa: F401, F403
|
47
configs/datasets/govrepcrs/govrepcrs_gen_455586.py
Normal file
47
configs/datasets/govrepcrs/govrepcrs_gen_455586.py
Normal file
@ -0,0 +1,47 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import GovRepcrsDataset
|
||||
|
||||
govrepcrs_reader_cfg = dict(
|
||||
input_columns='content',
|
||||
output_column='summary',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
govrepcrs_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin=[
|
||||
dict(
|
||||
role='SYSTEM',
|
||||
fallback_role="HUMAN",
|
||||
prompt=
|
||||
'Please summarize the following English report in English:'
|
||||
),
|
||||
],
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='{content}'),
|
||||
dict(role='BOT', prompt='{summary}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(
|
||||
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
|
||||
|
||||
govrepcrs_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
|
||||
govrepcrs_datasets = [
|
||||
dict(
|
||||
type=GovRepcrsDataset,
|
||||
path='./data/govrep/',
|
||||
abbr='GovRepcrs',
|
||||
reader_cfg=govrepcrs_reader_cfg,
|
||||
infer_cfg=govrepcrs_infer_cfg,
|
||||
eval_cfg=govrepcrs_eval_cfg)
|
||||
]
|
43
configs/datasets/hellaswag/hellaswag_gen_cae9cb.py
Normal file
43
configs/datasets/hellaswag/hellaswag_gen_cae9cb.py
Normal file
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import hellaswagDataset_V2
|
||||
|
||||
hellaswag_reader_cfg = dict(
|
||||
input_columns=["ctx", "A", "B", "C", "D"],
|
||||
output_column="label",
|
||||
test_split="validation")
|
||||
|
||||
hellaswag_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=(
|
||||
"{ctx}\nQuestion: Which ending makes the most sense?\n"
|
||||
"A. {A}\nB. {B}\nC. {C}\nD. {D}\n"
|
||||
"You may choose from 'A', 'B', 'C', 'D'.\n"
|
||||
"Answer:"),
|
||||
),
|
||||
]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever, ),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
hellaswag_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
pred_postprocessor=dict(type="first-capital"),
|
||||
)
|
||||
|
||||
hellaswag_datasets = [
|
||||
dict(
|
||||
type=hellaswagDataset_V2,
|
||||
path="hellaswag",
|
||||
reader_cfg=hellaswag_reader_cfg,
|
||||
infer_cfg=hellaswag_infer_cfg,
|
||||
eval_cfg=hellaswag_eval_cfg)
|
||||
]
|
38
configs/datasets/iwslt2017/iwslt2017_gen_02ea0b.py
Normal file
38
configs/datasets/iwslt2017/iwslt2017_gen_02ea0b.py
Normal file
@ -0,0 +1,38 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import BM25Retriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import IWSLT2017Dataset
|
||||
|
||||
iwslt2017_reader_cfg = dict(
|
||||
input_columns='en', output_column='de', train_split='validation')
|
||||
|
||||
iwslt2017_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='Please translate the following English statements to German:\n{en}'),
|
||||
dict(role='BOT', prompt='{de}'),
|
||||
]
|
||||
),
|
||||
ice_token='</E>'),
|
||||
retriever=dict(type=BM25Retriever, ice_num=1),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
iwslt2017_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
|
||||
iwslt2017_datasets = [
|
||||
dict(
|
||||
type=IWSLT2017Dataset,
|
||||
path='iwslt2017',
|
||||
name='iwslt2017-en-de',
|
||||
reader_cfg=iwslt2017_reader_cfg,
|
||||
infer_cfg=iwslt2017_infer_cfg,
|
||||
eval_cfg=iwslt2017_eval_cfg)
|
||||
]
|
4
configs/datasets/lcsts/lcsts_gen.py
Normal file
4
configs/datasets/lcsts/lcsts_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .lcsts_gen_427fde import lcsts_datasets # noqa: F401, F403
|
64
configs/datasets/mbpp/mbpp_gen_b60b47.py
Normal file
64
configs/datasets/mbpp/mbpp_gen_b60b47.py
Normal file
@ -0,0 +1,64 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MBPPDataset, MBPPEvaluator
|
||||
|
||||
mbpp_reader_cfg = dict(
|
||||
input_columns=['text', 'test_list'], output_column='code')
|
||||
|
||||
mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
|
||||
),
|
||||
dict(
|
||||
role="BOT",
|
||||
prompt=
|
||||
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
|
||||
),
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"
|
||||
),
|
||||
dict(role="BOT", prompt="[BEGIN]\n"),
|
||||
], )),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
|
||||
|
||||
mbpp_datasets = [
|
||||
dict(
|
||||
type=MBPPDataset,
|
||||
abbr='mbpp',
|
||||
path='./data/mbpp/mbpp.jsonl',
|
||||
reader_cfg=mbpp_reader_cfg,
|
||||
infer_cfg=mbpp_infer_cfg,
|
||||
eval_cfg=mbpp_eval_cfg)
|
||||
]
|
30
configs/datasets/narrativeqa/narrativeqa_gen_ca4b64.py
Normal file
30
configs/datasets/narrativeqa/narrativeqa_gen_ca4b64.py
Normal file
@ -0,0 +1,30 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import NarrativeQADataset, TriviaQAEvaluator
|
||||
|
||||
narrativeqa_reader_cfg = dict(
|
||||
input_columns=['question', 'evidence'],
|
||||
output_column='answer',
|
||||
train_split='valid',
|
||||
test_split='valid')
|
||||
|
||||
narrativeqa_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(
|
||||
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
|
||||
|
||||
narrativeqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
|
||||
|
||||
narrativeqa_datasets = [
|
||||
dict(
|
||||
type=NarrativeQADataset,
|
||||
abbr='NarrativeQA',
|
||||
path='./data/narrativeqa/',
|
||||
reader_cfg=narrativeqa_reader_cfg,
|
||||
infer_cfg=narrativeqa_infer_cfg,
|
||||
eval_cfg=narrativeqa_eval_cfg)
|
||||
]
|
4
configs/datasets/obqa/obqa_ppl.py
Normal file
4
configs/datasets/obqa/obqa_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .obqa_ppl_2b5b12 import obqa_datasets # noqa: F401, F403
|
66
configs/datasets/obqa/obqa_ppl_2b5b12.py
Normal file
66
configs/datasets/obqa/obqa_ppl_2b5b12.py
Normal file
@ -0,0 +1,66 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import OBQADataset
|
||||
|
||||
_input_columns = [
|
||||
['question_stem', 'A', 'B', 'C', 'D'],
|
||||
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
|
||||
]
|
||||
_template = [
|
||||
{
|
||||
ans: dict(
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
|
||||
),
|
||||
dict(role="BOT", prompt=ans),
|
||||
], )
|
||||
for ans in ['A', 'B', 'C', 'D']
|
||||
},
|
||||
{
|
||||
ans: dict(
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
|
||||
),
|
||||
dict(role="BOT", prompt=ans),
|
||||
], )
|
||||
for ans in ['A', 'B', 'C', 'D']
|
||||
}
|
||||
]
|
||||
|
||||
obqa_datasets = [
|
||||
dict(
|
||||
type=OBQADataset,
|
||||
path='openbookqa',
|
||||
split='test',
|
||||
),
|
||||
dict(
|
||||
abbr='openbookqa_fact',
|
||||
type=OBQADataset,
|
||||
path='openbookqa',
|
||||
name='additional',
|
||||
split='test',
|
||||
),
|
||||
]
|
||||
for _i in range(2):
|
||||
obqa_reader_cfg = dict(
|
||||
input_columns=_input_columns[_i], output_column="answerKey")
|
||||
obqa_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=_template[_i]),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
|
||||
|
||||
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
|
||||
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
|
||||
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
|
52
configs/datasets/obqa/obqa_ppl_3609cc.py
Normal file
52
configs/datasets/obqa/obqa_ppl_3609cc.py
Normal file
@ -0,0 +1,52 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import OBQADataset
|
||||
|
||||
_input_columns = [
|
||||
['question_stem', 'A', 'B', 'C', 'D'],
|
||||
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
|
||||
]
|
||||
_template = [{
|
||||
'A': "{question_stem} {A}",
|
||||
'B': "{question_stem} {B}",
|
||||
'C': "{question_stem} {C}",
|
||||
'D': "{question_stem} {D}",
|
||||
}, {
|
||||
'A': "Given the fact {fact1}, we know that {question_stem} {A}",
|
||||
'B': "Given the fact {fact1}, we know that {question_stem} {B}",
|
||||
'C': "Given the fact {fact1}, we know that {question_stem} {C}",
|
||||
'D': "Given the fact {fact1}, we know that {question_stem} {D}",
|
||||
}]
|
||||
|
||||
obqa_datasets = [
|
||||
dict(
|
||||
abbr="openbookqa",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
split="test",
|
||||
),
|
||||
dict(
|
||||
abbr="openbookqa_fact",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
name="additional",
|
||||
split="test",
|
||||
),
|
||||
]
|
||||
for _i in range(2):
|
||||
obqa_reader_cfg = dict(
|
||||
input_columns=_input_columns[_i], output_column="answerKey")
|
||||
obqa_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=_template[_i]),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
|
||||
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
|
||||
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
|
@ -0,0 +1,36 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import ToxicEvaluator
|
||||
from opencompass.datasets import RealToxicPromptsDataset
|
||||
|
||||
realtoxicprompts_reader_cfg = dict(
|
||||
input_columns=['prompt_text'],
|
||||
output_column='filename',
|
||||
train_split='train',
|
||||
test_split='train')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
realtoxicprompts_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[dict(role="HUMAN", prompt="{prompt_text}")])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
# When key is set to "ENV", the key will be fetched from the environment
|
||||
# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
|
||||
realtoxicprompts_eval_cfg = dict(
|
||||
evaluator=dict(type=ToxicEvaluator, key='ENV'),
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
||||
realtoxicprompts_datasets = [
|
||||
dict(
|
||||
type=RealToxicPromptsDataset,
|
||||
path='allenai/real-toxicity-prompts',
|
||||
challenging_subset=True,
|
||||
reader_cfg=realtoxicprompts_reader_cfg,
|
||||
infer_cfg=realtoxicprompts_infer_cfg,
|
||||
eval_cfg=realtoxicprompts_eval_cfg)
|
||||
]
|
39
configs/datasets/storycloze/storycloze_ppl_c1912d.py
Normal file
39
configs/datasets/storycloze/storycloze_ppl_c1912d.py
Normal file
@ -0,0 +1,39 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import storyclozeDataset
|
||||
|
||||
storycloze_reader_cfg = dict(
|
||||
input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
|
||||
output_column='answer_right_ending',
|
||||
train_split='test',
|
||||
test_split='test')
|
||||
|
||||
storycloze_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
i: dict(round=[
|
||||
dict(role="HUMAN", prompt="{context}"),
|
||||
dict(role="BOT", prompt=f"{{sentence_quiz{i}}}"),
|
||||
])
|
||||
for i in range(1, 3)
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
# The original story cloze dataset and repo are not long maintaining.
|
||||
# Using multilingual version of this dataset.
|
||||
storycloze_datasets = [
|
||||
dict(
|
||||
abbr='story_cloze',
|
||||
type=storyclozeDataset,
|
||||
path='juletxara/xstory_cloze',
|
||||
name='en',
|
||||
reader_cfg=storycloze_reader_cfg,
|
||||
infer_cfg=storycloze_infer_cfg,
|
||||
eval_cfg=storycloze_eval_cfg)
|
||||
]
|
50
configs/datasets/summedits/summedits_ppl_163352.py
Normal file
50
configs/datasets/summedits/summedits_ppl_163352.py
Normal file
@ -0,0 +1,50 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
|
||||
summedits_reader_cfg = dict(
|
||||
input_columns=['doc', 'summary'],
|
||||
output_column='label',
|
||||
test_split='train')
|
||||
|
||||
summedits_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
0:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"""\nDocument:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
|
||||
),
|
||||
dict(role="BOT", prompt="No")
|
||||
]),
|
||||
1:
|
||||
dict(round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
"""Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
|
||||
),
|
||||
dict(role="BOT", prompt="Yes")
|
||||
]),
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer))
|
||||
|
||||
summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
summedits_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
abbr='summedits',
|
||||
path='json',
|
||||
split='train',
|
||||
data_files='./data/summedits/summedits.jsonl',
|
||||
reader_cfg=summedits_reader_cfg,
|
||||
infer_cfg=summedits_infer_cfg,
|
||||
eval_cfg=summedits_eval_cfg)
|
||||
]
|
35
configs/datasets/summscreen/summscreen_gen_997ee2.py
Normal file
35
configs/datasets/summscreen/summscreen_gen_997ee2.py
Normal file
@ -0,0 +1,35 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import BleuEvaluator
|
||||
from opencompass.datasets import SummScreenDataset
|
||||
|
||||
summscreen_reader_cfg = dict(
|
||||
input_columns='content',
|
||||
output_column='summary',
|
||||
train_split='dev',
|
||||
test_split='dev')
|
||||
|
||||
summscreen_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=
|
||||
"Please summarize the following English report in English:{content}\n{summary}."),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(
|
||||
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
|
||||
|
||||
summscreen_eval_cfg = dict(
|
||||
evaluator=dict(type=BleuEvaluator),
|
||||
pred_postprocessor=dict(type='general_cn'),
|
||||
dataset_postprocessor=dict(type='general_cn'))
|
||||
|
||||
summscreen_datasets = [
|
||||
dict(
|
||||
type=SummScreenDataset,
|
||||
path='./data/SummScreen/',
|
||||
abbr='SummScreen',
|
||||
reader_cfg=summscreen_reader_cfg,
|
||||
infer_cfg=summscreen_infer_cfg,
|
||||
eval_cfg=summscreen_eval_cfg)
|
||||
]
|
4
configs/datasets/winograd/winograd_ppl.py
Normal file
4
configs/datasets/winograd/winograd_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .winograd_ppl_c1c427 import winograd_datasets # noqa: F401, F403
|
28
configs/datasets/z_bench/z_bench_gen_61db0a.py
Normal file
28
configs/datasets/z_bench/z_bench_gen_61db0a.py
Normal file
@ -0,0 +1,28 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset
|
||||
|
||||
z_bench_reader_cfg = dict(
|
||||
ds_size=4,
|
||||
input_columns=['text'],
|
||||
output_column='category',
|
||||
train_split='test')
|
||||
|
||||
z_bench_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[dict(role="HUMAN", prompt="{text}")]),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
z_bench_dataset = dict(
|
||||
type=HFDataset,
|
||||
path=
|
||||
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
|
||||
data_dir=
|
||||
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
|
||||
name='question',
|
||||
reader_cfg=z_bench_reader_cfg,
|
||||
infer_cfg=z_bench_infer_cfg)
|
20
docs/en/Makefile
Normal file
20
docs/en/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
1
docs/en/prompt/prompt_template.md
Normal file
1
docs/en/prompt/prompt_template.md
Normal file
@ -0,0 +1 @@
|
||||
# Prompt Template
|
1
docs/en/user_guides/models.md
Normal file
1
docs/en/user_guides/models.md
Normal file
@ -0,0 +1 @@
|
||||
# Prepare Models
|
62
docs/zh_cn/_static/css/readthedocs.css
Normal file
62
docs/zh_cn/_static/css/readthedocs.css
Normal file
@ -0,0 +1,62 @@
|
||||
.header-logo {
|
||||
background-image: url("../image/logo.png");
|
||||
background-size: 183px 50px;
|
||||
height: 50px;
|
||||
width: 183px;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 1100px) {
|
||||
.header-logo {
|
||||
top: -12px;
|
||||
}
|
||||
}
|
||||
|
||||
pre {
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 2000px) {
|
||||
.pytorch-content-left {
|
||||
width: 1200px;
|
||||
margin-left: 30px;
|
||||
}
|
||||
article.pytorch-article {
|
||||
max-width: 1200px;
|
||||
}
|
||||
.pytorch-breadcrumbs-wrapper {
|
||||
width: 1200px;
|
||||
}
|
||||
.pytorch-right-menu.scrolling-fixed {
|
||||
position: fixed;
|
||||
top: 45px;
|
||||
left: 1580px;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
article.pytorch-article section code {
|
||||
padding: .2em .4em;
|
||||
background-color: #f3f4f7;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
/* Disable the change in tables */
|
||||
article.pytorch-article section table code {
|
||||
padding: unset;
|
||||
background-color: unset;
|
||||
border-radius: unset;
|
||||
}
|
||||
|
||||
table.autosummary td {
|
||||
width: 50%
|
||||
}
|
||||
|
||||
img.align-center {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
article.pytorch-article p.rubric {
|
||||
font-weight: bold;
|
||||
}
|
BIN
docs/zh_cn/_static/image/logo.png
Normal file
BIN
docs/zh_cn/_static/image/logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
14
docs/zh_cn/_templates/callable.rst
Normal file
14
docs/zh_cn/_templates/callable.rst
Normal file
@ -0,0 +1,14 @@
|
||||
.. role:: hidden
|
||||
:class: hidden-section
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
|
||||
{{ name | underline}}
|
||||
|
||||
.. autoclass:: {{ name }}
|
||||
:members:
|
||||
:special-members: __call__
|
||||
|
||||
..
|
||||
autogenerated from _templates/callable.rst
|
||||
note it does not have :inherited-members:
|
67
docs/zh_cn/notes/contribution_guide.md
Normal file
67
docs/zh_cn/notes/contribution_guide.md
Normal file
@ -0,0 +1,67 @@
|
||||
# 为 OpenCompass 做贡献
|
||||
|
||||
- [为OpenCompass做贡献](#为opencompass做贡献)
|
||||
- [工作流程](#工作流程)
|
||||
- [代码风格](#代码风格)
|
||||
- [Python](#python)
|
||||
- [预提交钩子 (Pre-commit Hook)](#预提交钩子-pre-commit-hook)
|
||||
|
||||
感谢你对于OpenCompass的贡献!我们欢迎各种形式的贡献,包括但不限于以下几点。
|
||||
|
||||
- 修改错别字或修复bug
|
||||
- 添加文档或将文档翻译成其它语言
|
||||
- 添加新功能和组件
|
||||
|
||||
## 工作流程
|
||||
|
||||
我们建议潜在的贡献者遵循以下的贡献工作流程。
|
||||
|
||||
1. Fork并拉取最新的OpenCompass仓库,按照[开始使用](https://OpenCompass.readthedocs.io/en/latest/get_started.html)来设置环境。
|
||||
2. 检出一个新的分支(**不要使用master或dev分支来创建PR**)
|
||||
|
||||
```bash
|
||||
git checkout -b xxxx # xxxx 是新分支的名称
|
||||
```
|
||||
|
||||
3. 编辑相关文件,并且遵循下面提到的代码风格
|
||||
4. 使用[预提交钩子](https://pre-commit.com/)来检查和格式化你的更改。
|
||||
5. 提交你的更改
|
||||
6. 创建一个带有相关信息的PR
|
||||
|
||||
## 代码风格
|
||||
|
||||
### Python
|
||||
|
||||
我们采用[PEP8](https://www.python.org/dev/peps/pep-0008/)作为首选的代码风格。
|
||||
|
||||
我们使用以下工具进行linting和格式化:
|
||||
|
||||
- [flake8](https://github.com/PyCQA/flake8): 一个围绕一些linter工具的封装器。
|
||||
- [isort](https://github.com/timothycrosley/isort): 一个用于排序Python导入的实用程序。
|
||||
- [yapf](https://github.com/google/yapf): 一个Python文件的格式化器。
|
||||
- [codespell](https://github.com/codespell-project/codespell): 一个Python实用程序,用于修复文本文件中常见的拼写错误。
|
||||
- [mdformat](https://github.com/executablebooks/mdformat): mdformat是一个有明确定义的Markdown格式化程序,可以用来在Markdown文件中强制执行一致的样式。
|
||||
- [docformatter](https://github.com/myint/docformatter): 一个格式化docstring的工具。
|
||||
|
||||
yapf和isort的样式配置可以在[setup.cfg](https://github.com/OpenCompass/blob/main/setup.cfg)中找到。
|
||||
|
||||
## 预提交钩子 (Pre-commit Hook)
|
||||
|
||||
我们使用[预提交钩子](https://pre-commit.com/)用于在每次提交时自动检查与格式化`flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`,
|
||||
修复`end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`,并自动排序`requirments.txt`。预提交钩子的配置存储在[.pre-commit-config]()中。
|
||||
|
||||
在你克隆仓库后,你需要安装并初始化预提交钩子。
|
||||
|
||||
```shell
|
||||
pip install -U pre-commit
|
||||
```
|
||||
|
||||
从仓库文件夹运行
|
||||
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
之后,在每次提交时都会强制执行代码 linters 和格式化器。
|
||||
|
||||
> 在你创建PR前,确保你的代码通过了 lint 检查并被 yapf 格式化。
|
45
opencompass/datasets/arc.py
Normal file
45
opencompass/datasets/arc.py
Normal file
@ -0,0 +1,45 @@
|
||||
import json
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class ARCDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
with open(path, 'r', errors='ignore') as in_f:
|
||||
rows = []
|
||||
for i, line in enumerate(in_f):
|
||||
sample = json.loads(line.strip())
|
||||
answerKey = sample['answerKey']
|
||||
sample = sample['question']
|
||||
question = sample['stem']
|
||||
choices = sample['choices']
|
||||
if len(choices) != 4:
|
||||
continue
|
||||
textA = choices[0]['text']
|
||||
textB = choices[1]['text']
|
||||
textC = choices[2]['text']
|
||||
textD = choices[3]['text']
|
||||
rows.append({
|
||||
'question': question,
|
||||
'answerKey': answerKey,
|
||||
'textA': textA,
|
||||
'textB': textB,
|
||||
'textC': textC,
|
||||
'textD': textD
|
||||
})
|
||||
dataset = Dataset.from_dict({
|
||||
'question': [row['question'] for row in rows],
|
||||
'answerKey': [row['answerKey'] for row in rows],
|
||||
'textA': [row['textA'] for row in rows],
|
||||
'textB': [row['textB'] for row in rows],
|
||||
'textC': [row['textC'] for row in rows],
|
||||
'textD': [row['textD'] for row in rows]
|
||||
})
|
||||
return dataset
|
36
opencompass/datasets/flores.py
Normal file
36
opencompass/datasets/flores.py
Normal file
@ -0,0 +1,36 @@
|
||||
import re
|
||||
|
||||
from datasets import DatasetDict, load_dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class FloresFirst100Dataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(name):
|
||||
return DatasetDict({
|
||||
'dev':
|
||||
load_dataset(path='facebook/flores', name=name, split='dev'),
|
||||
'devtest':
|
||||
load_dataset(
|
||||
path='facebook/flores', name=name, split='devtest[:100]')
|
||||
})
|
||||
|
||||
|
||||
@TEXT_POSTPROCESSORS.register_module('flores')
|
||||
def flores_postprocess(text: str) -> str:
|
||||
text = text.strip().split('\n')[0]
|
||||
return text
|
||||
|
||||
|
||||
@TEXT_POSTPROCESSORS.register_module('flores-chinese')
|
||||
def flores_postprocess_chinese(text: str) -> str:
|
||||
import jieba
|
||||
truncated_text = text.strip().split('\n')[0]
|
||||
cleaned_text = re.sub(r'\s+', ' ', truncated_text).strip()
|
||||
cleaned_text = ' '.join(jieba.cut(cleaned_text))
|
||||
return cleaned_text
|
43
opencompass/datasets/qasper.py
Normal file
43
opencompass/datasets/qasper.py
Normal file
@ -0,0 +1,43 @@
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class QASPERDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
import json
|
||||
import os
|
||||
dataset_dict = DatasetDict()
|
||||
split = 'dev'
|
||||
dev_list = []
|
||||
|
||||
dev = os.path.join(path, 'qasper-dev-v0.3.json')
|
||||
with open(dev, 'r') as f:
|
||||
dev_json = json.load(f)
|
||||
|
||||
for article_id in dev_json.keys():
|
||||
full_article = '\n'.join([
|
||||
(x['section_name'] if x['section_name'] else '') + '\n' +
|
||||
'\n'.join(x['paragraphs']) + '\n'
|
||||
for x in dev_json[article_id]['full_text']
|
||||
])
|
||||
for qa in dev_json[article_id]['qas']:
|
||||
question = qa['question']
|
||||
answers = []
|
||||
for x in qa['answers']:
|
||||
answers.extend(x['answer']['extractive_spans'])
|
||||
if answers:
|
||||
dev_list.append({
|
||||
'answer': answers,
|
||||
'question': question,
|
||||
'evidence': full_article,
|
||||
})
|
||||
else:
|
||||
continue
|
||||
dataset_dict[split] = Dataset.from_list(dev_list)
|
||||
return dataset_dict
|
53
opencompass/datasets/qaspercut.py
Normal file
53
opencompass/datasets/qaspercut.py
Normal file
@ -0,0 +1,53 @@
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class QASPERCUTDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
import json
|
||||
import os
|
||||
dataset_dict = DatasetDict()
|
||||
split = 'dev'
|
||||
dev_list = []
|
||||
|
||||
dev = os.path.join(path, 'qasper-dev-v0.3.json')
|
||||
with open(dev, 'r') as f:
|
||||
dev_json = json.load(f)
|
||||
|
||||
for article_id in dev_json.keys():
|
||||
full_article = '\n'.join([
|
||||
(x['section_name'] if x['section_name'] else '') + '\n' +
|
||||
'\n'.join(x['paragraphs']) + '\n'
|
||||
for x in dev_json[article_id]['full_text']
|
||||
])
|
||||
for qa in dev_json[article_id]['qas']:
|
||||
question = qa['question']
|
||||
answers = []
|
||||
clues = []
|
||||
for x in qa['answers']:
|
||||
answers.extend(x['answer']['extractive_spans'])
|
||||
clues.extend(x['answer']['evidence'])
|
||||
|
||||
evis = [full_article.find(clue)
|
||||
for clue in clues] + [100000000]
|
||||
evi = min(evis)
|
||||
if evi == -1 or evi == 100000000:
|
||||
evi = 0
|
||||
|
||||
if answers:
|
||||
dev_list.append({
|
||||
'answer': answers,
|
||||
'question': question,
|
||||
'evidence': full_article[evi:],
|
||||
})
|
||||
else:
|
||||
continue
|
||||
|
||||
dataset_dict[split] = Dataset.from_list(dev_list)
|
||||
return dataset_dict
|
23
opencompass/datasets/safety.py
Normal file
23
opencompass/datasets/safety.py
Normal file
@ -0,0 +1,23 @@
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class SafetyDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
dataset = DatasetDict()
|
||||
|
||||
data_list = list()
|
||||
idx = 0
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
data_list.append({'idx': idx, 'prompt': line.strip()})
|
||||
idx += 1
|
||||
|
||||
dataset['test'] = Dataset.from_list(data_list)
|
58
opencompass/datasets/triviaqarc.py
Normal file
58
opencompass/datasets/triviaqarc.py
Normal file
@ -0,0 +1,58 @@
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class TriviaQArcDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
import json
|
||||
import os
|
||||
dataset_dict = DatasetDict()
|
||||
split = 'dev'
|
||||
dev_list = []
|
||||
|
||||
web_dev = os.path.join(path, 'qa', 'verified-web-dev.json')
|
||||
with open(web_dev, 'r') as f:
|
||||
web_dev_json = json.load(f)
|
||||
|
||||
for x in web_dev_json['Data']:
|
||||
cand_answers = x['Answer']['Aliases'] + x['Answer']['HumanAnswers']
|
||||
question = x['Question']
|
||||
evidence = ''
|
||||
if x['SearchResults']:
|
||||
x_path = os.path.join(path, 'evidence', 'web',
|
||||
x['SearchResults'][0]['Filename'])
|
||||
with open(x_path, 'r') as f:
|
||||
evidence = f.read(100000)
|
||||
dev_list.append({
|
||||
'answer': cand_answers,
|
||||
'question': question,
|
||||
'evidence': evidence,
|
||||
})
|
||||
|
||||
wiki_dev = os.path.join(path, 'qa', 'verified-wikipedia-dev.json')
|
||||
with open(wiki_dev, 'r') as f:
|
||||
wiki_dev_json = json.load(f)
|
||||
|
||||
for x in wiki_dev_json['Data']:
|
||||
cand_answers = x['Answer']['Aliases']
|
||||
question = x['Question']
|
||||
evidence = ''
|
||||
if x['EntityPages']:
|
||||
x_path = os.path.join(path, 'evidence', 'wikipedia',
|
||||
x['EntityPages'][0]['Filename'])
|
||||
with open(x_path, 'r') as f:
|
||||
evidence = f.read(100000)
|
||||
dev_list.append({
|
||||
'answer': cand_answers,
|
||||
'question': question,
|
||||
'evidence': evidence,
|
||||
})
|
||||
|
||||
dataset_dict[split] = Dataset.from_list(dev_list)
|
||||
return dataset_dict
|
44
opencompass/datasets/winogrande.py
Normal file
44
opencompass/datasets/winogrande.py
Normal file
@ -0,0 +1,44 @@
|
||||
from datasets import load_dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class winograndeDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
prompt = example.pop('sentence')
|
||||
example['opt1'] = prompt.replace('_', example.pop('option1'))
|
||||
example['opt2'] = prompt.replace('_', example.pop('option2'))
|
||||
return example
|
||||
|
||||
return dataset.map(preprocess)
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class winograndeDataset_V2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
prompt = example.pop('sentence')
|
||||
example['opt1'] = prompt.replace('_', example.pop('option1'))
|
||||
example['opt2'] = prompt.replace('_', example.pop('option2'))
|
||||
answer = example.pop('answer')
|
||||
if answer == '':
|
||||
example['label'] = 'NULL'
|
||||
else:
|
||||
example['label'] = ' AB'[int(answer)]
|
||||
return example
|
||||
|
||||
return dataset.map(preprocess)
|
29
opencompass/datasets/xcopa.py
Normal file
29
opencompass/datasets/xcopa.py
Normal file
@ -0,0 +1,29 @@
|
||||
from datasets import concatenate_datasets, load_dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class XCOPADataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
path = kwargs.get('path', None)
|
||||
lans = [
|
||||
'et', 'ht', 'it', 'id', 'qu', 'sw', 'zh', 'ta', 'th', 'tr', 'vi',
|
||||
'translation-et', 'translation-ht', 'translation-it',
|
||||
'translation-id', 'translation-sw', 'translation-zh',
|
||||
'translation-ta', 'translation-th', 'translation-tr',
|
||||
'translation-vi'
|
||||
]
|
||||
|
||||
datasets = []
|
||||
for lan in lans:
|
||||
dataset = load_dataset(path, lan)['validation']
|
||||
datasets.append(dataset)
|
||||
|
||||
combined_dataset = concatenate_datasets(datasets)
|
||||
|
||||
return combined_dataset
|
41
opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py
Normal file
41
opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py
Normal file
@ -0,0 +1,41 @@
|
||||
from typing import List
|
||||
import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
from opencompass.registry import ICL_EVALUATORS
|
||||
|
||||
from .icl_base_evaluator import BaseEvaluator
|
||||
|
||||
|
||||
@ICL_EVALUATORS.register_module()
|
||||
class AUCROCEvaluator(BaseEvaluator):
|
||||
"""Calculate AUC-ROC scores and accuracy according the prediction.
|
||||
|
||||
For some dataset, the accuracy cannot reveal the difference between
|
||||
models because of the saturation. AUC-ROC scores can further exam
|
||||
model abilities to distinguish different labels. More details can refer to
|
||||
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
|
||||
""" # noqa
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def score(self, predictions: List, references: List) -> dict:
|
||||
"""Calculate scores and accuracy.
|
||||
|
||||
Args:
|
||||
predictions (List): List of probabilities for each class of each
|
||||
sample.
|
||||
references (List): List of target labels for each sample.
|
||||
|
||||
Returns:
|
||||
dict: calculated scores.
|
||||
"""
|
||||
if len(predictions) != len(references):
|
||||
return {
|
||||
'error': 'predictions and references have different length.'
|
||||
}
|
||||
auc_score = roc_auc_score(references, np.array(predictions)[:, 1])
|
||||
accuracy = sum(
|
||||
references == np.argmax(predictions, axis=1)) / len(references)
|
||||
return dict(auc_score=auc_score * 100, accuracy=accuracy * 100)
|
4
opencompass/openicl/icl_inferencer/__init__.py
Normal file
4
opencompass/openicl/icl_inferencer/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from .icl_base_inferencer import BaseInferencer
|
||||
from .icl_gen_inferencer import GenInferencer
|
||||
from .icl_ppl_inferencer import PPLInferencer
|
||||
from .icl_clp_inferencer import CLPInferencer
|
74
opencompass/openicl/icl_retriever/icl_bm25_retriever.py
Normal file
74
opencompass/openicl/icl_retriever/icl_bm25_retriever.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""BM25 Retriever."""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
from nltk.tokenize import word_tokenize
|
||||
from rank_bm25 import BM25Okapi
|
||||
from tqdm import trange
|
||||
|
||||
from opencompass.openicl.icl_retriever import BaseRetriever
|
||||
from opencompass.openicl.utils.logging import get_logger
|
||||
from opencompass.registry import ICL_RETRIEVERS
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@ICL_RETRIEVERS.register_module()
|
||||
class BM25Retriever(BaseRetriever):
|
||||
"""BM25 Retriever. In information retrieval, Okapi BM25 (BM is an
|
||||
abbreviation of best matching) is a ranking function used by search engines
|
||||
to estimate the relevance of documents to a given search query. You can
|
||||
find more details in https://en.wikipedia.org/wiki/Okapi_BM25. Each in-
|
||||
context example of the test prompts is retrieved by the BM25 Algorithm.
|
||||
|
||||
Args:
|
||||
dataset (`BaseDataset`): Any BaseDataset instances.
|
||||
Attributes of ``reader``, ``train`` and ``test`` will be used.
|
||||
ice_separator (`Optional[str]`): The separator between each in-context
|
||||
example template when origin `PromptTemplate` is provided. Defaults
|
||||
to '\n'.
|
||||
ice_eos_token (`Optional[str]`): The end of sentence token for
|
||||
in-context example template when origin `PromptTemplate` is
|
||||
provided. Defaults to '\n'.
|
||||
ice_num (`Optional[int]`): The number of in-context example template
|
||||
when origin `PromptTemplate` is provided. Defaults to 1.
|
||||
index_split (`Optional[str]`): The split of the dataset to retrieve the
|
||||
in-context example index, used when `dataset_reader.dataset` is an
|
||||
instance of `datasets.Dataset`. Defaults to 'train'.
|
||||
test_split (`Optional[str]`): The split of the dataset to retrieve the
|
||||
in-context example, used when `dataset_reader.dataset` is an
|
||||
instance of `datasets.Dataset`. Defaults to 'test'.
|
||||
"""
|
||||
bm25 = None
|
||||
index_corpus = None
|
||||
test_corpus = None
|
||||
|
||||
def __init__(self,
|
||||
dataset,
|
||||
ice_separator: Optional[str] = '\n',
|
||||
ice_eos_token: Optional[str] = '\n',
|
||||
ice_num: Optional[int] = 1) -> None:
|
||||
super().__init__(dataset, ice_separator, ice_eos_token, ice_num)
|
||||
self.index_corpus = [
|
||||
word_tokenize(data) for data in
|
||||
self.dataset_reader.generate_input_field_corpus(self.index_ds)
|
||||
]
|
||||
self.bm25 = BM25Okapi(self.index_corpus)
|
||||
self.test_corpus = [
|
||||
word_tokenize(data) for data in
|
||||
self.dataset_reader.generate_input_field_corpus(self.test_ds)
|
||||
]
|
||||
|
||||
def retrieve(self) -> List[List]:
|
||||
"""Retrieve the in-context example index for each test example."""
|
||||
rtr_idx_list = []
|
||||
logger.info('Retrieving data for test set...')
|
||||
for idx in trange(len(self.test_corpus),
|
||||
disable=not self.is_main_process):
|
||||
query = self.test_corpus[idx]
|
||||
scores = self.bm25.get_scores(query)
|
||||
near_ids = list(np.argsort(scores)[::-1][:self.ice_num])
|
||||
near_ids = [int(a) for a in near_ids]
|
||||
rtr_idx_list.append(near_ids)
|
||||
return rtr_idx_list
|
40
opencompass/openicl/icl_retriever/icl_random_retriever.py
Normal file
40
opencompass/openicl/icl_retriever/icl_random_retriever.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""Random Retriever."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from tqdm import trange
|
||||
|
||||
from opencompass.openicl.icl_retriever import BaseRetriever
|
||||
from opencompass.openicl.utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class RandomRetriever(BaseRetriever):
|
||||
"""Random Retriever. Each in-context example of the test prompts is
|
||||
retrieved in a random way.
|
||||
|
||||
**WARNING**: This class has not been tested thoroughly. Please use it with
|
||||
caution.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset,
|
||||
ice_separator: Optional[str] = '\n',
|
||||
ice_eos_token: Optional[str] = '\n',
|
||||
ice_num: Optional[int] = 1,
|
||||
seed: Optional[int] = 43) -> None:
|
||||
super().__init__(dataset, ice_separator, ice_eos_token, ice_num)
|
||||
self.seed = seed
|
||||
|
||||
def retrieve(self):
|
||||
np.random.seed(self.seed)
|
||||
num_idx = len(self.index_ds)
|
||||
rtr_idx_list = []
|
||||
logger.info('Retrieving data for test set...')
|
||||
for _ in trange(len(self.test_ds), disable=not self.is_main_process):
|
||||
idx_list = np.random.choice(num_idx, self.ice_num,
|
||||
replace=False).tolist()
|
||||
rtr_idx_list.append(idx_list)
|
||||
return rtr_idx_list
|
26
opencompass/openicl/icl_retriever/icl_zero_retriever.py
Normal file
26
opencompass/openicl/icl_retriever/icl_zero_retriever.py
Normal file
@ -0,0 +1,26 @@
|
||||
"""Zeroshot Retriever."""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from opencompass.openicl.icl_retriever import BaseRetriever
|
||||
from opencompass.registry import ICL_RETRIEVERS
|
||||
|
||||
|
||||
@ICL_RETRIEVERS.register_module()
|
||||
class ZeroRetriever(BaseRetriever):
|
||||
"""Zeroshot Retriever. The retriever returns empty list for all queries.
|
||||
|
||||
Args:
|
||||
dataset (`BaseDataset`): Any BaseDataset instances.
|
||||
Attributes of ``reader``, ``train`` and ``test`` will be used.
|
||||
ice_eos_token (`Optional[str]`): The end of sentence token for
|
||||
in-context example template when origin `PromptTemplate` is
|
||||
provided. Defaults to ''.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset, ice_eos_token: Optional[str] = '') -> None:
|
||||
super().__init__(dataset, '', ice_eos_token, 0)
|
||||
|
||||
def retrieve(self) -> List[List]:
|
||||
rtr_idx_list = [[] for _ in range(len(self.test_ds))]
|
||||
return rtr_idx_list
|
1
opencompass/openicl/utils/__init__.py
Normal file
1
opencompass/openicl/utils/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .logging import *
|
13
opencompass/utils/logging.py
Normal file
13
opencompass/utils/logging.py
Normal file
@ -0,0 +1,13 @@
|
||||
from mmengine.logging import MMLogger
|
||||
|
||||
|
||||
def get_logger(log_level='INFO') -> MMLogger:
|
||||
"""Get the logger for OpenCompass.
|
||||
|
||||
Args:
|
||||
log_level (str): The log level. Default: 'INFO'. Choices are 'DEBUG',
|
||||
'INFO', 'WARNING', 'ERROR', 'CRITICAL'.
|
||||
"""
|
||||
return MMLogger.get_instance('OpenCompass',
|
||||
logger_name='OpenCompass',
|
||||
log_level=log_level)
|
Loading…
Reference in New Issue
Block a user