Add Release Contraibution

This commit is contained in:
Ezra-Yu 2023-07-05 02:22:40 +00:00 committed by gaotong
parent 36f111100f
commit cbe9fe2cdb
65 changed files with 2266 additions and 0 deletions

69
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,69 @@
exclude: |
(?x)^(
tests/data/|
opencompass/models/internal/|
opencompass/utils/internal/|
configs/
)
repos:
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
hooks:
- id: flake8
- repo: https://github.com/PyCQA/isort
rev: 5.11.5
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.32.0
hooks:
- id: yapf
- repo: https://github.com/codespell-project/codespell
rev: v2.2.1
hooks:
- id: codespell
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/
)
- id: check-yaml
- id: end-of-file-fixer
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/
)
- id: requirements-txt-fixer
- id: double-quote-string-fixer
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
- id: mixed-line-ending
args: ["--fix=lf"]
- id: mixed-line-ending
args: ["--fix=lf"]
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
- id: mdformat
args: ["--number", "--table-width", "200"]
additional_dependencies:
- mdformat-openmmlab
- mdformat_frontmatter
- linkify-it-py
- repo: https://github.com/myint/docformatter
rev: v1.3.1
hooks:
- id: docformatter
args: ["--in-place", "--wrap-descriptions", "79"]
# - repo: https://github.com/open-mmlab/pre-commit-hooks
# rev: v0.2.0 # Use the ref you want to point at
# hooks:
# - id: check-algo-readme
# - id: check-copyright
# args: ["mmocr", "tests", "tools"] # these directories will be checked

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .ARC_e_ppl_f86898 import ARC_e_datasets # noqa: F401, F403

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset
C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="文章:{content}\n问题:{question}"),
dict(role="BOT", prompt=f"答案:{{choice{i}}}")
])
for i in range(4)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import CMRCDataset
CMRC_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')
CMRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="文章:{context}\n根据上文,回答如下问题:\n{question}\n答:"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
CMRC_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role="BOT",
)
CMRC_datasets = [
dict(
type=CMRCDataset,
abbr='CMRC_dev',
path='./data/CLUE/CMRC/dev.json',
reader_cfg=CMRC_reader_cfg,
infer_cfg=CMRC_infer_cfg,
eval_cfg=CMRC_eval_cfg),
]

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import CMRCDataset
CMRC_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')
CMRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role="HUMAN", prompt="文章:{context}\n根据上文,回答如下问题:{question}"),
dict(role="BOT", prompt="答:"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
CMRC_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role="BOT",
)
CMRC_datasets = [
dict(
type=CMRCDataset,
abbr='CMRC_dev',
path='./data/CLUE/CMRC/dev.json',
reader_cfg=CMRC_reader_cfg,
infer_cfg=CMRC_infer_cfg,
eval_cfg=CMRC_eval_cfg),
]

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import DRCDDataset
DRCD_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')
DRCD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="文章:{context}\n根据上文,回答如下问题:\n{question}\n答:"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
DRCD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role="BOT",
)
DRCD_datasets = [
dict(
type=DRCDDataset,
abbr='DRCD_dev',
path='./data/CLUE/DRCD/dev.json',
reader_cfg=DRCD_reader_cfg,
infer_cfg=DRCD_infer_cfg,
eval_cfg=DRCD_eval_cfg),
]

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AFQMCDataset_V2
bustm_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
bustm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n请判断语句一和语句二说的是否是一个意思?\nA. 无关\nB. 相关\n请从“A”“B”中进行选择。\n答:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
bustm_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
bustm_datasets = [
dict(
abbr="bustm-dev",
type=AFQMCDataset_V2, # bustm share the same format with AFQMC
path="./data/FewCLUE/bustm/dev_few_all.json",
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg,
),
dict(
abbr="bustm-test",
type=AFQMCDataset_V2, # bustm share the same format with AFQMC
path="./data/FewCLUE/bustm/test_public.json",
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg,
),
]

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CHIDDataset
chid_reader_cfg = dict(
input_columns=[f'content{i}' for i in range(7)], output_column='answer')
chid_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(
round=[
dict(role="HUMAN", prompt=f"以下句子是否通顺?\n{{content{i}}}"),
dict(role="BOT", prompt="这个句子是通顺的。"),
], )
for i in range(7)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
chid_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_role="BOT")
chid_datasets = [
dict(
type=CHIDDataset,
path='json',
abbr='chid-dev',
data_files='./data/FewCLUE/chid/dev_few_all.json',
split='train',
reader_cfg=chid_reader_cfg,
infer_cfg=chid_infer_cfg,
eval_cfg=chid_eval_cfg),
dict(
type=CHIDDataset,
path='json',
abbr='chid-test',
data_files='./data/FewCLUE/chid/test_public.json',
split='train',
reader_cfg=chid_reader_cfg,
infer_cfg=chid_infer_cfg,
eval_cfg=chid_eval_cfg),
]

View File

@ -0,0 +1,58 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CluewscDataset
cluewsc_reader_cfg = dict(
input_columns=['span1', 'span2', 'text', 'new_text'],
output_column='answer')
cluewsc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"?"
),
dict(role="BOT", prompt="No.")
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"?"
),
dict(role="BOT", prompt="Yes.")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cluewsc_datasets = [
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-dev',
data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-test',
data_files='./data/FewCLUE/cluewsc/test_public.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
]

View File

@ -0,0 +1,48 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
tnews_labels = [
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
]
tnews_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
lb: dict(round=[
dict(role='HUMAN', prompt='{sentence}\n上述内容属于什么新闻?'),
dict(role='BOT', prompt=lb)
])
for lb in tnews_labels
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
tnews_datasets = [
dict(
type=TNewsDataset,
path='json',
abbr='tnews-dev',
data_files='./data/FewCLUE/tnews/dev_few_all.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg),
dict(
type=TNewsDataset,
path='json',
abbr='tnews-test',
data_files='./data/FewCLUE/tnews/test_public.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .GaokaoBench_gen_aed980 import GaokaoBench_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AXDataset_V2
AX_g_reader_cfg = dict(
input_columns=["hypothesis", "premise"],
output_column="label",
)
AX_g_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
AX_g_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
AX_g_datasets = [
dict(
abbr="AX_g",
type=AXDataset_V2,
path="./data/SuperGLUE/AX-g/AX-g.jsonl",
reader_cfg=AX_g_reader_cfg,
infer_cfg=AX_g_infer_cfg,
eval_cfg=AX_g_eval_cfg,
)
]

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
AX_g_reader_cfg = dict(
input_columns=["hypothesis", "premise"],
output_column="label",
test_split="train")
AX_g_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="Yes"),
]),
"not_entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="No"),
])
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
AX_g_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
AX_g_datasets = [
dict(
type=HFDataset,
abbr="AX_g",
path="json",
data_files="./data/SuperGLUE/AX-g/AX-g.jsonl",
split="train",
reader_cfg=AX_g_reader_cfg,
infer_cfg=AX_g_infer_cfg,
eval_cfg=AX_g_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_CB_gen_bb97e1 import CB_datasets # noqa: F401, F403

View File

@ -0,0 +1,43 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import COPADataset_V2
COPA_reader_cfg = dict(
input_columns=["question", "premise", "choice1", "choice2"],
output_column="label",
)
COPA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"{premise}\nQuestion: Which may be the {question}?\nA. {choice1}\nB. {choice2}\nAnswer:"
),
], ),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
COPA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
COPA_datasets = [
dict(
abbr="COPA",
type=COPADataset_V2,
path="./data/SuperGLUE/COPA/val.jsonl",
reader_cfg=COPA_reader_cfg,
infer_cfg=COPA_infer_cfg,
eval_cfg=COPA_eval_cfg,
)
]

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MultiRCDataset_V2
MultiRC_reader_cfg = dict(
input_columns=["question", "text", "answer"],
output_column="label",
)
MultiRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nQuestion: {question}\nAnswer: {answer}\nIs it true?\nA. Yes\nB. No\nAnswer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
MultiRC_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
MultiRC_datasets = [
dict(
abbr="MultiRC",
type=MultiRCDataset_V2,
path="./data/SuperGLUE/MultiRC/val.jsonl",
reader_cfg=MultiRC_reader_cfg,
infer_cfg=MultiRC_infer_cfg,
eval_cfg=MultiRC_eval_cfg,
)
]

View File

@ -0,0 +1,46 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WiCDataset_V2
WiC_reader_cfg = dict(
input_columns=[
"word",
"sentence1",
"sentence2",
],
output_column="label",
)
WiC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\nAre '{word}' in the above two sentenses the same?\nA. Yes\nB. No\nAnswer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
WiC_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
WiC_datasets = [
dict(
abbr="WiC",
type=WiCDataset_V2,
path="./data/SuperGLUE/WiC/val.jsonl",
reader_cfg=WiC_reader_cfg,
infer_cfg=WiC_infer_cfg,
eval_cfg=WiC_eval_cfg,
)
]

View File

@ -0,0 +1,55 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WiCDataset
WiC_reader_cfg = dict(
input_columns=[
"word",
"sentence1",
"sentence2",
],
output_column="answer",
test_split="train")
WiC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt=
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\n'{word}' in the above two sentenses are different."
),
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt=
"Sentence 1: {sentence1}\nSentence 2: {sentence2}\n'{word}' in the above two sentenses are the same."
),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WiC_datasets = [
dict(
type=WiCDataset,
abbr="WiC",
path="json",
data_files="./data/SuperGLUE/WiC/val.jsonl",
split="train",
reader_cfg=WiC_reader_cfg,
infer_cfg=WiC_infer_cfg,
eval_cfg=WiC_eval_cfg,
)
]

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WiCDataset
WiC_reader_cfg = dict(
input_columns=[
"word",
"sentence1",
"sentence2",
],
output_column="answer",
test_split="train")
WiC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt="{word} in {sentence1} and {sentence2} is different."),
]),
1:
dict(round=[
dict(role="HUMAN", prompt="{word} in {sentence1} and {sentence2} is same."),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WiC_datasets = [
dict(
type=WiCDataset,
abbr="WiC",
path="json",
data_files="./data/SuperGLUE/WiC/val.jsonl",
split="train",
reader_cfg=WiC_reader_cfg,
infer_cfg=WiC_infer_cfg,
eval_cfg=WiC_eval_cfg,
)
]

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_prompt1 = "Please read a math problem, and then think step by step to derive the answer. The answer is decided by Answer Type. " \
"If the Answer type in [bool], the answer needs to be True or False. " \
"Else if the Answer type in [integer, float] , The answer needs to be in numerical form. " \
"Else if the Answer type in [list of integer, list of float] , the answer needs to be a list of number like [2, 3, 4]. " \
"Else if the Answer type in [option], the answer needs to be an option like (a), (b), (c), (d)." \
"You need to output the answer in your final sentence like 'Therefore, the answer is ...'."
TheoremQA_prompt2 = f"Below is an instruction that describes a task, paired with an input that provides further context. " \
f"Write a response that appropriately completes the request.\n\n### Instruction:\n{TheoremQA_prompt1}\n\n### Input:\n{{Question}}\nAnswer_type:{{Answer_type}}\n### Response:\n"
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=TheoremQA_prompt2),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .ceval_gen_ee2cb0 import ceval_datasets # noqa: F401, F403

View File

@ -0,0 +1,31 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import CivilCommentsDataset
civilcomments_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
civilcomments_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Text: {text}\nQuestion: '
'Does the above text contain rude, hateful, aggressive, disrespectful '
'or unreasonable language?\nAnswer:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
civilcomments_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
civilcomments_datasets = [
dict(
type=CivilCommentsDataset,
path='civil_comments',
reader_cfg=civilcomments_reader_cfg,
infer_cfg=civilcomments_infer_cfg,
eval_cfg=civilcomments_eval_cfg)
]

View File

@ -0,0 +1,57 @@
from mmengine.config import read_base
with read_base():
from ..mmlu.mmlu_gen_a568f1 import mmlu_datasets
from ..ceval.ceval_gen_ee2cb0 import ceval_datasets
from ..agieval.agieval_gen_dc7dae import agieval_datasets
from ..GaokaoBench.GaokaoBench_gen_aed980 import GaokaoBench_datasets
from ..bbh.bbh_gen_58abc3 import bbh_datasets
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_gen_9e3de9 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets
from ..CLUE_cmnli.CLUE_cmnli_gen_316313 import cmnli_datasets
from ..CLUE_ocnli.CLUE_ocnli_gen_7c44b0 import ocnli_datasets
from ..FewCLUE_bustm.FewCLUE_bustm_gen_305431 import bustm_datasets
from ..FewCLUE_chid.FewCLUE_chid_gen_686c63 import chid_datasets
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_276956 import cluewsc_datasets
from ..FewCLUE_csl.FewCLUE_csl_gen_1b0c02 import csl_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_d6d06d import eprstmt_datasets
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_bef37f import ocnli_fc_datasets
from ..FewCLUE_tnews.FewCLUE_tnews_gen_8d59ba import tnews_datasets
from ..lcsts.lcsts_gen_427fde import lcsts_datasets
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
from ..storycloze.storycloze_gen_c5a230 import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_477186 import AX_b_datasets
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets
from ..SuperGLUE_CB.SuperGLUE_CB_gen_bb97e1 import CB_datasets
from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_6d5e67 import COPA_datasets
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets
from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_ce346a import RTE_datasets
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_c39367 import WiC_datasets
from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_d8d441 import WSC_datasets
from ..race.race_gen_12de48 import race_datasets
from ..Xsum.Xsum_gen_d2126e import Xsum_datasets
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
from ..summedits.summedits_gen_4f35b5 import summedits_datasets
from ..math.math_gen_78bcba import math_datasets
from ..TheoremQA.TheoremQA_gen_891fcf import TheoremQA_datasets
from ..hellaswag.hellaswag_gen_cae9cb import hellaswag_datasets
from ..ARC_e.ARC_e_gen_0a29bf import ARC_e_datasets
from ..ARC_c.ARC_c_gen_3f3039 import ARC_c_datasets
from ..commonsenseqa.commonsenseqa_gen_a58dbd import commonsenseqa_datasets
from ..piqa.piqa_gen_8287ae import piqa_datasets
from ..siqa.siqa_gen_a3c714 import siqa_datasets
from ..strategyqa.strategyqa_gen_be3f8d import strategyqa_datasets
from ..winogrande.winogrande_gen_c19d87 import winogrande_datasets
from ..obqa.obqa_gen_b2cde9 import obqa_datasets
from ..nq.nq_gen_a6ffca import nq_datasets
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
from ..flores.flores_gen_8eb9ca import flores_datasets
from ..crowspairs.crowspairs_gen_dd110a import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -0,0 +1,60 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=["question", "A", "B", "C", "D", "E"],
output_column="answerKey",
test_split="validation")
_ice_template = dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
"{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nAnswer:",
),
dict(
role="BOT",
prompt="{answerKey}",
),
],
),
ice_token="</E>",
)
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template,
),
inferencer=dict(type=GenInferencer),
)
commonsenseqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type="first-capital"),
)
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path="commonsense_qa",
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg,
)
]
del _ice_template

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset
C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice0}",
1:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice1}",
2:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice2}",
3:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice3}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
tnews_labels = [
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
]
tnews_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={lb: f'</E></S>这篇新闻属于:{lb}'
for lb in tnews_labels},
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template='</E></S>\n以上这篇新闻属于',
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=tnews_labels))
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
tnews_datasets = [
dict(
type=TNewsDataset,
path='json',
abbr='tnews',
data_files='./data/FewCLUE/tnews/test_public.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .govrepcrs_gen_455586 import govrepcrs_datasets # noqa: F401, F403

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import GovRepcrsDataset
govrepcrs_reader_cfg = dict(
input_columns='content',
output_column='summary',
train_split='test',
test_split='test')
govrepcrs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role="HUMAN",
prompt=
'Please summarize the following English report in English:'
),
],
round=[
dict(role='HUMAN', prompt='{content}'),
dict(role='BOT', prompt='{summary}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
govrepcrs_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
govrepcrs_datasets = [
dict(
type=GovRepcrsDataset,
path='./data/govrep/',
abbr='GovRepcrs',
reader_cfg=govrepcrs_reader_cfg,
infer_cfg=govrepcrs_infer_cfg,
eval_cfg=govrepcrs_eval_cfg)
]

View File

@ -0,0 +1,43 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset_V2
hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"],
output_column="label",
test_split="validation")
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=(
"{ctx}\nQuestion: Which ending makes the most sense?\n"
"A. {A}\nB. {B}\nC. {C}\nD. {D}\n"
"You may choose from 'A', 'B', 'C', 'D'.\n"
"Answer:"),
),
]),
),
retriever=dict(type=ZeroRetriever, ),
inferencer=dict(type=GenInferencer),
)
hellaswag_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
hellaswag_datasets = [
dict(
type=hellaswagDataset_V2,
path="hellaswag",
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]

View File

@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import BM25Retriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import IWSLT2017Dataset
iwslt2017_reader_cfg = dict(
input_columns='en', output_column='de', train_split='validation')
iwslt2017_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Please translate the following English statements to German:\n{en}'),
dict(role='BOT', prompt='{de}'),
]
),
ice_token='</E>'),
retriever=dict(type=BM25Retriever, ice_num=1),
inferencer=dict(type=GenInferencer))
iwslt2017_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
iwslt2017_datasets = [
dict(
type=IWSLT2017Dataset,
path='iwslt2017',
name='iwslt2017-en-de',
reader_cfg=iwslt2017_reader_cfg,
infer_cfg=iwslt2017_infer_cfg,
eval_cfg=iwslt2017_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .lcsts_gen_427fde import lcsts_datasets # noqa: F401, F403

View File

@ -0,0 +1,64 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator
mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='code')
mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"
),
dict(role="BOT", prompt="[BEGIN]\n"),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NarrativeQADataset, TriviaQAEvaluator
narrativeqa_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='valid',
test_split='valid')
narrativeqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
narrativeqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
narrativeqa_datasets = [
dict(
type=NarrativeQADataset,
abbr='NarrativeQA',
path='./data/narrativeqa/',
reader_cfg=narrativeqa_reader_cfg,
infer_cfg=narrativeqa_infer_cfg,
eval_cfg=narrativeqa_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .obqa_ppl_2b5b12 import obqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,66 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
['question_stem', 'A', 'B', 'C', 'D'],
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
]
_template = [
{
ans: dict(
round=[
dict(
role="HUMAN",
prompt=
"Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
dict(role="BOT", prompt=ans),
], )
for ans in ['A', 'B', 'C', 'D']
},
{
ans: dict(
round=[
dict(
role="HUMAN",
prompt=
"Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
dict(role="BOT", prompt=ans),
], )
for ans in ['A', 'B', 'C', 'D']
}
]
obqa_datasets = [
dict(
type=OBQADataset,
path='openbookqa',
split='test',
),
dict(
abbr='openbookqa_fact',
type=OBQADataset,
path='openbookqa',
name='additional',
split='test',
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg

View File

@ -0,0 +1,52 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
['question_stem', 'A', 'B', 'C', 'D'],
['question_stem', 'A', 'B', 'C', 'D', 'fact1'],
]
_template = [{
'A': "{question_stem} {A}",
'B': "{question_stem} {B}",
'C': "{question_stem} {C}",
'D': "{question_stem} {D}",
}, {
'A': "Given the fact {fact1}, we know that {question_stem} {A}",
'B': "Given the fact {fact1}, we know that {question_stem} {B}",
'C': "Given the fact {fact1}, we know that {question_stem} {C}",
'D': "Given the fact {fact1}, we know that {question_stem} {D}",
}]
obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
obqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg

View File

@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import ToxicEvaluator
from opencompass.datasets import RealToxicPromptsDataset
realtoxicprompts_reader_cfg = dict(
input_columns=['prompt_text'],
output_column='filename',
train_split='train',
test_split='train')
# TODO: allow empty output-column
realtoxicprompts_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[dict(role="HUMAN", prompt="{prompt_text}")])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
# When key is set to "ENV", the key will be fetched from the environment
# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
realtoxicprompts_eval_cfg = dict(
evaluator=dict(type=ToxicEvaluator, key='ENV'),
pred_role='BOT',
)
realtoxicprompts_datasets = [
dict(
type=RealToxicPromptsDataset,
path='allenai/real-toxicity-prompts',
challenging_subset=True,
reader_cfg=realtoxicprompts_reader_cfg,
infer_cfg=realtoxicprompts_infer_cfg,
eval_cfg=realtoxicprompts_eval_cfg)
]

View File

@ -0,0 +1,39 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import storyclozeDataset
storycloze_reader_cfg = dict(
input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
output_column='answer_right_ending',
train_split='test',
test_split='test')
storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="{context}"),
dict(role="BOT", prompt=f"{{sentence_quiz{i}}}"),
])
for i in range(1, 3)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
# The original story cloze dataset and repo are not long maintaining.
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)
]

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'],
output_column='label',
test_split='train')
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt=
"""\nDocument:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
),
dict(role="BOT", prompt="No")
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt=
"""Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
),
dict(role="BOT", prompt="Yes")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
summedits_datasets = [
dict(
type=HFDataset,
abbr='summedits',
path='json',
split='train',
data_files='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]

View File

@ -0,0 +1,35 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import SummScreenDataset
summscreen_reader_cfg = dict(
input_columns='content',
output_column='summary',
train_split='dev',
test_split='dev')
summscreen_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
"Please summarize the following English report in English:{content}\n{summary}."),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
summscreen_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
summscreen_datasets = [
dict(
type=SummScreenDataset,
path='./data/SummScreen/',
abbr='SummScreen',
reader_cfg=summscreen_reader_cfg,
infer_cfg=summscreen_infer_cfg,
eval_cfg=summscreen_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .winograd_ppl_c1c427 import winograd_datasets # noqa: F401, F403

View File

@ -0,0 +1,28 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset
z_bench_reader_cfg = dict(
ds_size=4,
input_columns=['text'],
output_column='category',
train_split='test')
z_bench_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[dict(role="HUMAN", prompt="{text}")]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
z_bench_dataset = dict(
type=HFDataset,
path=
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
data_dir=
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
name='question',
reader_cfg=z_bench_reader_cfg,
infer_cfg=z_bench_infer_cfg)

20
docs/en/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -0,0 +1 @@
# Prompt Template

View File

@ -0,0 +1 @@
# Prepare Models

View File

@ -0,0 +1,62 @@
.header-logo {
background-image: url("../image/logo.png");
background-size: 183px 50px;
height: 50px;
width: 183px;
}
@media screen and (min-width: 1100px) {
.header-logo {
top: -12px;
}
}
pre {
white-space: pre;
}
@media screen and (min-width: 2000px) {
.pytorch-content-left {
width: 1200px;
margin-left: 30px;
}
article.pytorch-article {
max-width: 1200px;
}
.pytorch-breadcrumbs-wrapper {
width: 1200px;
}
.pytorch-right-menu.scrolling-fixed {
position: fixed;
top: 45px;
left: 1580px;
}
}
article.pytorch-article section code {
padding: .2em .4em;
background-color: #f3f4f7;
border-radius: 5px;
}
/* Disable the change in tables */
article.pytorch-article section table code {
padding: unset;
background-color: unset;
border-radius: unset;
}
table.autosummary td {
width: 50%
}
img.align-center {
display: block;
margin-left: auto;
margin-right: auto;
}
article.pytorch-article p.rubric {
font-weight: bold;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -0,0 +1,14 @@
.. role:: hidden
:class: hidden-section
.. currentmodule:: {{ module }}
{{ name | underline}}
.. autoclass:: {{ name }}
:members:
:special-members: __call__
..
autogenerated from _templates/callable.rst
note it does not have :inherited-members:

View File

@ -0,0 +1,67 @@
# 为 OpenCompass 做贡献
- [为OpenCompass做贡献](#为opencompass做贡献)
- [工作流程](#工作流程)
- [代码风格](#代码风格)
- [Python](#python)
- [预提交钩子 (Pre-commit Hook)](#预提交钩子-pre-commit-hook)
感谢你对于OpenCompass的贡献我们欢迎各种形式的贡献包括但不限于以下几点。
- 修改错别字或修复bug
- 添加文档或将文档翻译成其它语言
- 添加新功能和组件
## 工作流程
我们建议潜在的贡献者遵循以下的贡献工作流程。
1. Fork并拉取最新的OpenCompass仓库按照[开始使用](https://OpenCompass.readthedocs.io/en/latest/get_started.html)来设置环境。
2. 检出一个新的分支(**不要使用master或dev分支来创建PR**
```bash
git checkout -b xxxx # xxxx 是新分支的名称
```
3. 编辑相关文件,并且遵循下面提到的代码风格
4. 使用[预提交钩子](https://pre-commit.com/)来检查和格式化你的更改。
5. 提交你的更改
6. 创建一个带有相关信息的PR
## 代码风格
### Python
我们采用[PEP8](https://www.python.org/dev/peps/pep-0008/)作为首选的代码风格。
我们使用以下工具进行linting和格式化
- [flake8](https://github.com/PyCQA/flake8): 一个围绕一些linter工具的封装器。
- [isort](https://github.com/timothycrosley/isort): 一个用于排序Python导入的实用程序。
- [yapf](https://github.com/google/yapf): 一个Python文件的格式化器。
- [codespell](https://github.com/codespell-project/codespell): 一个Python实用程序用于修复文本文件中常见的拼写错误。
- [mdformat](https://github.com/executablebooks/mdformat): mdformat是一个有明确定义的Markdown格式化程序可以用来在Markdown文件中强制执行一致的样式。
- [docformatter](https://github.com/myint/docformatter): 一个格式化docstring的工具。
yapf和isort的样式配置可以在[setup.cfg](https://github.com/OpenCompass/blob/main/setup.cfg)中找到。
## 预提交钩子 (Pre-commit Hook)
我们使用[预提交钩子](https://pre-commit.com/)用于在每次提交时自动检查与格式化`flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`
修复`end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`,并自动排序`requirments.txt`。预提交钩子的配置存储在[.pre-commit-config]()中。
在你克隆仓库后,你需要安装并初始化预提交钩子。
```shell
pip install -U pre-commit
```
从仓库文件夹运行
```shell
pre-commit install
```
之后,在每次提交时都会强制执行代码 linters 和格式化器。
> 在你创建PR前确保你的代码通过了 lint 检查并被 yapf 格式化。

View File

@ -0,0 +1,45 @@
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class ARCDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(path, 'r', errors='ignore') as in_f:
rows = []
for i, line in enumerate(in_f):
sample = json.loads(line.strip())
answerKey = sample['answerKey']
sample = sample['question']
question = sample['stem']
choices = sample['choices']
if len(choices) != 4:
continue
textA = choices[0]['text']
textB = choices[1]['text']
textC = choices[2]['text']
textD = choices[3]['text']
rows.append({
'question': question,
'answerKey': answerKey,
'textA': textA,
'textB': textB,
'textC': textC,
'textD': textD
})
dataset = Dataset.from_dict({
'question': [row['question'] for row in rows],
'answerKey': [row['answerKey'] for row in rows],
'textA': [row['textA'] for row in rows],
'textB': [row['textB'] for row in rows],
'textC': [row['textC'] for row in rows],
'textD': [row['textD'] for row in rows]
})
return dataset

View File

@ -0,0 +1,36 @@
import re
from datasets import DatasetDict, load_dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@LOAD_DATASET.register_module()
class FloresFirst100Dataset(BaseDataset):
@staticmethod
def load(name):
return DatasetDict({
'dev':
load_dataset(path='facebook/flores', name=name, split='dev'),
'devtest':
load_dataset(
path='facebook/flores', name=name, split='devtest[:100]')
})
@TEXT_POSTPROCESSORS.register_module('flores')
def flores_postprocess(text: str) -> str:
text = text.strip().split('\n')[0]
return text
@TEXT_POSTPROCESSORS.register_module('flores-chinese')
def flores_postprocess_chinese(text: str) -> str:
import jieba
truncated_text = text.strip().split('\n')[0]
cleaned_text = re.sub(r'\s+', ' ', truncated_text).strip()
cleaned_text = ' '.join(jieba.cut(cleaned_text))
return cleaned_text

View File

@ -0,0 +1,43 @@
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class QASPERDataset(BaseDataset):
@staticmethod
def load(path: str):
import json
import os
dataset_dict = DatasetDict()
split = 'dev'
dev_list = []
dev = os.path.join(path, 'qasper-dev-v0.3.json')
with open(dev, 'r') as f:
dev_json = json.load(f)
for article_id in dev_json.keys():
full_article = '\n'.join([
(x['section_name'] if x['section_name'] else '') + '\n' +
'\n'.join(x['paragraphs']) + '\n'
for x in dev_json[article_id]['full_text']
])
for qa in dev_json[article_id]['qas']:
question = qa['question']
answers = []
for x in qa['answers']:
answers.extend(x['answer']['extractive_spans'])
if answers:
dev_list.append({
'answer': answers,
'question': question,
'evidence': full_article,
})
else:
continue
dataset_dict[split] = Dataset.from_list(dev_list)
return dataset_dict

View File

@ -0,0 +1,53 @@
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class QASPERCUTDataset(BaseDataset):
@staticmethod
def load(path: str):
import json
import os
dataset_dict = DatasetDict()
split = 'dev'
dev_list = []
dev = os.path.join(path, 'qasper-dev-v0.3.json')
with open(dev, 'r') as f:
dev_json = json.load(f)
for article_id in dev_json.keys():
full_article = '\n'.join([
(x['section_name'] if x['section_name'] else '') + '\n' +
'\n'.join(x['paragraphs']) + '\n'
for x in dev_json[article_id]['full_text']
])
for qa in dev_json[article_id]['qas']:
question = qa['question']
answers = []
clues = []
for x in qa['answers']:
answers.extend(x['answer']['extractive_spans'])
clues.extend(x['answer']['evidence'])
evis = [full_article.find(clue)
for clue in clues] + [100000000]
evi = min(evis)
if evi == -1 or evi == 100000000:
evi = 0
if answers:
dev_list.append({
'answer': answers,
'question': question,
'evidence': full_article[evi:],
})
else:
continue
dataset_dict[split] = Dataset.from_list(dev_list)
return dataset_dict

View File

@ -0,0 +1,23 @@
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class SafetyDataset(BaseDataset):
@staticmethod
def load(path):
dataset = DatasetDict()
data_list = list()
idx = 0
with open(path, 'r') as f:
for line in f:
if line.strip():
data_list.append({'idx': idx, 'prompt': line.strip()})
idx += 1
dataset['test'] = Dataset.from_list(data_list)

View File

@ -0,0 +1,58 @@
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class TriviaQArcDataset(BaseDataset):
@staticmethod
def load(path: str):
import json
import os
dataset_dict = DatasetDict()
split = 'dev'
dev_list = []
web_dev = os.path.join(path, 'qa', 'verified-web-dev.json')
with open(web_dev, 'r') as f:
web_dev_json = json.load(f)
for x in web_dev_json['Data']:
cand_answers = x['Answer']['Aliases'] + x['Answer']['HumanAnswers']
question = x['Question']
evidence = ''
if x['SearchResults']:
x_path = os.path.join(path, 'evidence', 'web',
x['SearchResults'][0]['Filename'])
with open(x_path, 'r') as f:
evidence = f.read(100000)
dev_list.append({
'answer': cand_answers,
'question': question,
'evidence': evidence,
})
wiki_dev = os.path.join(path, 'qa', 'verified-wikipedia-dev.json')
with open(wiki_dev, 'r') as f:
wiki_dev_json = json.load(f)
for x in wiki_dev_json['Data']:
cand_answers = x['Answer']['Aliases']
question = x['Question']
evidence = ''
if x['EntityPages']:
x_path = os.path.join(path, 'evidence', 'wikipedia',
x['EntityPages'][0]['Filename'])
with open(x_path, 'r') as f:
evidence = f.read(100000)
dev_list.append({
'answer': cand_answers,
'question': question,
'evidence': evidence,
})
dataset_dict[split] = Dataset.from_list(dev_list)
return dataset_dict

View File

@ -0,0 +1,44 @@
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class winograndeDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
prompt = example.pop('sentence')
example['opt1'] = prompt.replace('_', example.pop('option1'))
example['opt2'] = prompt.replace('_', example.pop('option2'))
return example
return dataset.map(preprocess)
@LOAD_DATASET.register_module()
class winograndeDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
prompt = example.pop('sentence')
example['opt1'] = prompt.replace('_', example.pop('option1'))
example['opt2'] = prompt.replace('_', example.pop('option2'))
answer = example.pop('answer')
if answer == '':
example['label'] = 'NULL'
else:
example['label'] = ' AB'[int(answer)]
return example
return dataset.map(preprocess)

View File

@ -0,0 +1,29 @@
from datasets import concatenate_datasets, load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class XCOPADataset(BaseDataset):
@staticmethod
def load(**kwargs):
path = kwargs.get('path', None)
lans = [
'et', 'ht', 'it', 'id', 'qu', 'sw', 'zh', 'ta', 'th', 'tr', 'vi',
'translation-et', 'translation-ht', 'translation-it',
'translation-id', 'translation-sw', 'translation-zh',
'translation-ta', 'translation-th', 'translation-tr',
'translation-vi'
]
datasets = []
for lan in lans:
dataset = load_dataset(path, lan)['validation']
datasets.append(dataset)
combined_dataset = concatenate_datasets(datasets)
return combined_dataset

View File

@ -0,0 +1,41 @@
from typing import List
import numpy as np
from sklearn.metrics import roc_auc_score
from opencompass.registry import ICL_EVALUATORS
from .icl_base_evaluator import BaseEvaluator
@ICL_EVALUATORS.register_module()
class AUCROCEvaluator(BaseEvaluator):
"""Calculate AUC-ROC scores and accuracy according the prediction.
For some dataset, the accuracy cannot reveal the difference between
models because of the saturation. AUC-ROC scores can further exam
model abilities to distinguish different labels. More details can refer to
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
""" # noqa
def __init__(self) -> None:
super().__init__()
def score(self, predictions: List, references: List) -> dict:
"""Calculate scores and accuracy.
Args:
predictions (List): List of probabilities for each class of each
sample.
references (List): List of target labels for each sample.
Returns:
dict: calculated scores.
"""
if len(predictions) != len(references):
return {
'error': 'predictions and references have different length.'
}
auc_score = roc_auc_score(references, np.array(predictions)[:, 1])
accuracy = sum(
references == np.argmax(predictions, axis=1)) / len(references)
return dict(auc_score=auc_score * 100, accuracy=accuracy * 100)

View File

@ -0,0 +1,4 @@
from .icl_base_inferencer import BaseInferencer
from .icl_gen_inferencer import GenInferencer
from .icl_ppl_inferencer import PPLInferencer
from .icl_clp_inferencer import CLPInferencer

View File

@ -0,0 +1,74 @@
"""BM25 Retriever."""
from typing import List, Optional
import numpy as np
from nltk.tokenize import word_tokenize
from rank_bm25 import BM25Okapi
from tqdm import trange
from opencompass.openicl.icl_retriever import BaseRetriever
from opencompass.openicl.utils.logging import get_logger
from opencompass.registry import ICL_RETRIEVERS
logger = get_logger(__name__)
@ICL_RETRIEVERS.register_module()
class BM25Retriever(BaseRetriever):
"""BM25 Retriever. In information retrieval, Okapi BM25 (BM is an
abbreviation of best matching) is a ranking function used by search engines
to estimate the relevance of documents to a given search query. You can
find more details in https://en.wikipedia.org/wiki/Okapi_BM25. Each in-
context example of the test prompts is retrieved by the BM25 Algorithm.
Args:
dataset (`BaseDataset`): Any BaseDataset instances.
Attributes of ``reader``, ``train`` and ``test`` will be used.
ice_separator (`Optional[str]`): The separator between each in-context
example template when origin `PromptTemplate` is provided. Defaults
to '\n'.
ice_eos_token (`Optional[str]`): The end of sentence token for
in-context example template when origin `PromptTemplate` is
provided. Defaults to '\n'.
ice_num (`Optional[int]`): The number of in-context example template
when origin `PromptTemplate` is provided. Defaults to 1.
index_split (`Optional[str]`): The split of the dataset to retrieve the
in-context example index, used when `dataset_reader.dataset` is an
instance of `datasets.Dataset`. Defaults to 'train'.
test_split (`Optional[str]`): The split of the dataset to retrieve the
in-context example, used when `dataset_reader.dataset` is an
instance of `datasets.Dataset`. Defaults to 'test'.
"""
bm25 = None
index_corpus = None
test_corpus = None
def __init__(self,
dataset,
ice_separator: Optional[str] = '\n',
ice_eos_token: Optional[str] = '\n',
ice_num: Optional[int] = 1) -> None:
super().__init__(dataset, ice_separator, ice_eos_token, ice_num)
self.index_corpus = [
word_tokenize(data) for data in
self.dataset_reader.generate_input_field_corpus(self.index_ds)
]
self.bm25 = BM25Okapi(self.index_corpus)
self.test_corpus = [
word_tokenize(data) for data in
self.dataset_reader.generate_input_field_corpus(self.test_ds)
]
def retrieve(self) -> List[List]:
"""Retrieve the in-context example index for each test example."""
rtr_idx_list = []
logger.info('Retrieving data for test set...')
for idx in trange(len(self.test_corpus),
disable=not self.is_main_process):
query = self.test_corpus[idx]
scores = self.bm25.get_scores(query)
near_ids = list(np.argsort(scores)[::-1][:self.ice_num])
near_ids = [int(a) for a in near_ids]
rtr_idx_list.append(near_ids)
return rtr_idx_list

View File

@ -0,0 +1,40 @@
"""Random Retriever."""
from typing import Optional
import numpy as np
from tqdm import trange
from opencompass.openicl.icl_retriever import BaseRetriever
from opencompass.openicl.utils.logging import get_logger
logger = get_logger(__name__)
class RandomRetriever(BaseRetriever):
"""Random Retriever. Each in-context example of the test prompts is
retrieved in a random way.
**WARNING**: This class has not been tested thoroughly. Please use it with
caution.
"""
def __init__(self,
dataset,
ice_separator: Optional[str] = '\n',
ice_eos_token: Optional[str] = '\n',
ice_num: Optional[int] = 1,
seed: Optional[int] = 43) -> None:
super().__init__(dataset, ice_separator, ice_eos_token, ice_num)
self.seed = seed
def retrieve(self):
np.random.seed(self.seed)
num_idx = len(self.index_ds)
rtr_idx_list = []
logger.info('Retrieving data for test set...')
for _ in trange(len(self.test_ds), disable=not self.is_main_process):
idx_list = np.random.choice(num_idx, self.ice_num,
replace=False).tolist()
rtr_idx_list.append(idx_list)
return rtr_idx_list

View File

@ -0,0 +1,26 @@
"""Zeroshot Retriever."""
from typing import List, Optional
from opencompass.openicl.icl_retriever import BaseRetriever
from opencompass.registry import ICL_RETRIEVERS
@ICL_RETRIEVERS.register_module()
class ZeroRetriever(BaseRetriever):
"""Zeroshot Retriever. The retriever returns empty list for all queries.
Args:
dataset (`BaseDataset`): Any BaseDataset instances.
Attributes of ``reader``, ``train`` and ``test`` will be used.
ice_eos_token (`Optional[str]`): The end of sentence token for
in-context example template when origin `PromptTemplate` is
provided. Defaults to ''.
"""
def __init__(self, dataset, ice_eos_token: Optional[str] = '') -> None:
super().__init__(dataset, '', ice_eos_token, 0)
def retrieve(self) -> List[List]:
rtr_idx_list = [[] for _ in range(len(self.test_ds))]
return rtr_idx_list

View File

@ -0,0 +1 @@
from .logging import *

View File

@ -0,0 +1,13 @@
from mmengine.logging import MMLogger
def get_logger(log_level='INFO') -> MMLogger:
"""Get the logger for OpenCompass.
Args:
log_level (str): The log level. Default: 'INFO'. Choices are 'DEBUG',
'INFO', 'WARNING', 'ERROR', 'CRITICAL'.
"""
return MMLogger.get_instance('OpenCompass',
logger_name='OpenCompass',
log_level=log_level)