initial commit

This commit is contained in:
gaotongxiao 2023-07-04 21:34:55 +08:00
commit 7d346000bb
188 changed files with 17029 additions and 0 deletions

66
README_zh-CN.md Normal file
View File

@ -0,0 +1,66 @@
<div align="center">
<img src="https://user-images.githubusercontent.com/22607038/250798681-b52045d2-cedd-4070-84e2-410903ac404f.png" width="500px"/>
[![docs](https://readthedocs.org/projects/opencompass/badge/?version=dev-1.x)](https://opencompass.readthedocs.io/en/dev-1.x/?badge=dev-1.x)
[![license](https://img.shields.io/github/license/IntenLM/opencompass.svg)](https://github.com/InternLM/opencompass/blob/main/LICENSE)
[![PyPI](https://badge.fury.io/py/opencompass.svg)](https://pypi.org/project/opencompass/)
[📘Documentation](https://opencompass.readthedocs.io/en/latest/) |
[🛠Installation](https://opencompass.readthedocs.io/en/latest/get_started/install.html) |
[🤔Reporting Issues](https://github.com/InternLM/opencompass/issues/new/choose)
[English](/README.md) | 简体中文
</div>
## 介绍
OpenCompass 是面向大模型评测的一站式平台,旨在提供一套公平、公开、可复现的大模型评测基准方案。其主要特点如下:
- **模型及数据集的全方位支持**:预支持 20+ HuggingFace 及 API 模型,并提供 50+ 个数据集约 30 万题的的模型评测方案6 大维度的能力全面评测。
- **高效分布式评测**:一行命令实现任务分割和分布式评测,数小时即可完成千亿模型全量评测\*。
- **多样化评测范式**:支持零样本、小样本及思维链评测,结合标准型或对话型提示词模板,轻松激发各种模型最大性能。
- **易于扩展的模块化设计**想增加新模型或数据集想要自定义更高级的任务分割策略甚至接入新的集群管理系统OpenCompass 的一切均可轻松扩展!
- **完善的实验记录及上报机制**:使用配置文件完整记录每一次实验,关键信息有迹可循;结果实时上报飞书机器人,第一时间知晓实验情况。
## 模型能力排名
## 能力维度 & 模型支持
## 安装
下面展示了快速安装的步骤。有部分第三方功能可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)。
```Python
conda create --name opencompass python=3.8 pytorch torchvision -c pytorch -y
conda activate opencompass
git clone https://github.com/InternLM/opencompass opencompass
cd opencompass
pip install -r requirements/runtime.txt
pip install -e .
# 下载数据集到 data/ 处
# TODO: ....
```
## 评测
请阅读[快速上手](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)了解如何运行一个评测任务。
## 致谢
该项目部分的代码引用并修改自 [OpenICL](https://github.com/Shark-NLP/OpenICL)。
## 引用
```bibtex
@misc{2023opencompass,
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
author={OpenCompass Contributors},
howpublished = {\url{https://github.com/InternLM/OpenCompass}},
year={2023}
}
```

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .ARC_c_ppl_ba951c import ARC_c_datasets # noqa: F401, F403

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey')
ARC_c_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"A":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textA}")
], ),
"B":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textB}")
], ),
"C":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textC}")
], ),
"D":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textD}")
], ),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .ARC_e_gen_0a29bf import ARC_e_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .CLUE_C3_ppl_588820 import C3_datasets # noqa: F401, F403

View File

@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset
C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "文章:{content}\n问题:{question}\n答案:{choice0}",
1: "文章:{content}\n问题:{question}\n答案:{choice1}",
2: "文章:{content}\n问题:{question}\n答案:{choice2}",
3: "文章:{content}\n问题:{question}\n答案:{choice3}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]

View File

@ -0,0 +1,27 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import DRCDDataset
DRCD_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')
DRCD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="文章:{context}\n根据上文,回答如下问题: {question}\n答:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
DRCD_eval_cfg = dict(evaluator=dict(type=EMEvaluator), )
DRCD_datasets = [
dict(
type=DRCDDataset,
abbr='DRCD_dev',
path='./data/CLUE/DRCD/dev.json',
reader_cfg=DRCD_reader_cfg,
infer_cfg=DRCD_infer_cfg,
eval_cfg=DRCD_eval_cfg),
]

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import DRCDDataset
DRCD_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')
DRCD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role="HUMAN", prompt="文章:{context}\n根据上文,回答如下问题:{question}"),
dict(role="BOT", prompt="答:"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
DRCD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role="BOT",
)
DRCD_datasets = [
dict(
type=DRCDDataset,
abbr='DRCD_dev',
path='./data/CLUE/DRCD/dev.json',
reader_cfg=DRCD_reader_cfg,
infer_cfg=DRCD_infer_cfg,
eval_cfg=DRCD_eval_cfg),
]

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2
cmnli_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”“B”“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
cmnli_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
cmnli_datasets = [
dict(
abbr="cmnli",
type=cmnliDataset_V2,
path="./data/CLUE/cmnli/cmnli_public/dev.json",
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg,
)
]

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2
cmnli_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?\nA. 蕴含\nB. 矛盾\nC. 无关\n请从“A”“B”“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
cmnli_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
cmnli_datasets = [
dict(
abbr="cmnli",
type=cmnliDataset_V2,
path="./data/CLUE/cmnli/cmnli_public/dev.json",
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg,
)
]

View File

@ -0,0 +1,56 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
cmnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="矛盾")
]),
'entailment':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="蕴含")
]),
'neutral':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="无关")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]

View File

@ -0,0 +1,43 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2
ocnli_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
)
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?\nA. 蕴含\n B. 矛盾\n C. 无关\n请从“A”“B”“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
ocnli_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
ocnli_datasets = [
dict(
abbr="ocnli",
type=cmnliDataset_V2, # ocnli share the same format with cmnli
path="./data/CLUE/OCNLI/dev.json",
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg,
)
]

View File

@ -0,0 +1,51 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'], output_column='label')
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
dict(round=[
dict(
role="HUMAN",
prompt="阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}"),
dict(role="BOT", prompt="")
]),
'entailment':
dict(round=[
dict(
role="HUMAN",
prompt="阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}"),
dict(role="BOT", prompt="")
]),
'neutral':
dict(round=[
dict(
role="HUMAN", prompt="如果{sentence1}为真,那么{sentence2}也为真吗?"),
dict(role="BOT", prompt="可能")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ocnli_datasets = [
dict(
type=HFDataset,
abbr='ocnli',
path='json',
split='train',
data_files='./data/CLUE/OCNLI/dev.json',
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg)
]

View File

@ -0,0 +1,55 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'], output_column='label')
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="矛盾")
]),
'entailment':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="蕴含")
]),
'neutral':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}\n语句二:“{sentence2}\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="无关")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ocnli_datasets = [
dict(
type=HFDataset,
abbr='ocnli',
path='json',
split='train',
data_files='./data/CLUE/OCNLI/dev.json',
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg)
]

View File

@ -0,0 +1,65 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
bustm_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
bustm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt="请判断以下两句话说的是否是一个意思:")
],
round=[
dict(role="HUMAN", prompt="{sentence1}{sentence2}"),
dict(role="BOT", prompt="两句话说的毫不相关。")
]),
1:
dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt="请判断以下两句话说的是否是一个意思:")
],
round=[
dict(role="HUMAN", prompt="{sentence1}{sentence2}"),
dict(role="BOT", prompt="两句话说是的一个意思。")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
bustm_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
bustm_datasets = [
dict(
type=HFDataset,
abbr='bustm-dev',
path='json',
data_files='./data/FewCLUE/bustm/dev_few_all.json',
split='train',
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg),
dict(
type=HFDataset,
abbr='bustm-test',
path='json',
data_files='./data/FewCLUE/bustm/test_public.json',
split='train',
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg)
]

View File

@ -0,0 +1,59 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
bustm_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
bustm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n请判断语句一和语句二说的是否是一个意思?"
),
dict(role="BOT", prompt="两句话说的毫不相关。")
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}\n语句二:“{sentence2}\n请判断语句一和语句二说的是否是一个意思?"
),
dict(role="BOT", prompt="两句话说是的一个意思。")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
bustm_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
bustm_datasets = [
dict(
type=HFDataset,
abbr='bustm-dev',
path='json',
data_files='./data/FewCLUE/bustm/dev_few_all.json',
split='train',
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg),
dict(
type=HFDataset,
abbr='bustm-test',
path='json',
data_files='./data/FewCLUE/bustm/test_public.json',
split='train',
reader_cfg=bustm_reader_cfg,
infer_cfg=bustm_infer_cfg,
eval_cfg=bustm_eval_cfg)
]

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CHIDDataset_V2
chid_reader_cfg = dict(
input_columns=["content","A","B","C","D","E","F","G"],
output_column="answer",
)
chid_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"{content}\n请选择______处所填的词\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nF. {F}\nG. {G}\n请从”A“”B“”C“”D“”E“”F“”G“中进行选择。答",
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
chid_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
chid_datasets = [
dict(
abbr="chid-dev",
type=CHIDDataset_V2,
path="./data/FewCLUE/chid/dev_few_all.json",
reader_cfg=chid_reader_cfg,
infer_cfg=chid_infer_cfg,
eval_cfg=chid_eval_cfg,
),
dict(
abbr="chid-test",
type=CHIDDataset_V2,
path="./data/FewCLUE/chid/test_public.json",
reader_cfg=chid_reader_cfg,
infer_cfg=chid_infer_cfg,
eval_cfg=chid_eval_cfg,
),
]

View File

@ -0,0 +1,54 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CluewscDataset
cluewsc_reader_cfg = dict(
input_columns=['span1', 'span2', 'text', 'new_text'],
output_column='answer')
cluewsc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt="{text}\n此处,代词“{span2}“被用于指代“{span1}“吗?"),
dict(role="BOT", prompt="")
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt="{text}\n此处,代词“{span2}“被用于指代“{span1}“吗?"),
dict(role="BOT", prompt="")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cluewsc_datasets = [
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-dev',
data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-test',
data_files='./data/FewCLUE/cluewsc/test_public.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
]

View File

@ -0,0 +1,44 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CluewscDataset
cluewsc_reader_cfg = dict(
input_columns=['span1', 'span2', 'text', 'new_text'],
output_column='answer')
cluewsc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"? No.",
1:
"{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"? Yes.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cluewsc_datasets = [
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-dev',
data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
dict(
type=CluewscDataset,
path='json',
abbr='cluewsc-test',
data_files='./data/FewCLUE/cluewsc/test_public.json',
split='train',
reader_cfg=cluewsc_reader_cfg,
infer_cfg=cluewsc_infer_cfg,
eval_cfg=cluewsc_eval_cfg),
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .FewCLUE_csl_ppl_8eee08 import csl_datasets # noqa: F401, F403

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CslDataset
csl_reader_cfg = dict(
input_columns=["abst", "keywords"], output_column='label')
csl_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[dict(role="HUMAN", prompt="摘要:{abst}")]),
1:
dict(
round=[dict(role="HUMAN", prompt="摘要:{abst}\n关键词:{keywords}")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
csl_datasets = [
dict(
type=CslDataset,
path='json',
abbr='csl_dev',
data_files='./data/FewCLUE/csl/dev_few_all.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg),
dict(
type=CslDataset,
path='json',
abbr='csl_test',
data_files='./data/FewCLUE/csl/test_public.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
eprstmt_reader_cfg = dict(
input_columns=['sentence'], output_column='label', test_split='train')
eprstmt_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'Negative': ' 内容: "{sentence}"。情绪分类:消极。',
'Positive': ' 内容: "{sentence}"。情绪分类:积极。',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
eprstmt_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
eprstmt_datasets = [
dict(
type=HFDataset,
abbr='eprstmt-dev',
path='json',
data_files='./data/FewCLUE/eprstmt/dev_few_all.json',
split='train',
reader_cfg=eprstmt_reader_cfg,
infer_cfg=eprstmt_infer_cfg,
eval_cfg=eprstmt_eval_cfg),
dict(
type=HFDataset,
abbr='eprstmt-test',
path='json',
data_files='./data/FewCLUE/eprstmt/test_public.json',
split='train',
reader_cfg=eprstmt_reader_cfg,
infer_cfg=eprstmt_infer_cfg,
eval_cfg=eprstmt_eval_cfg)
]

View File

@ -0,0 +1,60 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_fc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
ocnli_fc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
dict(round=[
dict(
role="HUMAN",
prompt="阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}"),
dict(role="BOT", prompt="")
]),
'entailment':
dict(round=[
dict(
role="HUMAN",
prompt="阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}"),
dict(role="BOT", prompt="")
]),
'neutral':
dict(round=[
dict(
role="HUMAN", prompt="如果{sentence1}为真,那么{sentence2}也为真吗?"),
dict(role="BOT", prompt="可能")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_fc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ocnli_fc_datasets = [
dict(
type=HFDataset,
abbr='ocnli_fc-dev',
path='json',
split='train',
data_files='./data/FewCLUE/ocnli/dev_few_all.json',
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg),
dict(
type=HFDataset,
abbr='ocnli_fc-test',
path='json',
split='train',
data_files='./data/FewCLUE/ocnli/test_public.json',
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg)
]

View File

@ -0,0 +1,44 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_fc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
ocnli_fc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'阅读文章:{sentence1}\n根据上文,回答如下问题: {sentence2}\n答:错',
'entailment': '阅读文章:{sentence1}\n根据上文,回答如下问题: {sentence2}\n答:对',
'neutral': '如果{sentence1}为真,那么{sentence2}也为真吗?可能'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_fc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ocnli_fc_datasets = [
dict(
type=HFDataset,
abbr='ocnli_fc-dev',
path='json',
split='train',
data_files='./data/FewCLUE/ocnli/dev_few_all.json',
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg),
dict(
type=HFDataset,
abbr='ocnli_fc-test',
path='json',
split='train',
data_files='./data/FewCLUE/ocnli/test_public.json',
reader_cfg=ocnli_fc_reader_cfg,
infer_cfg=ocnli_fc_infer_cfg,
eval_cfg=ocnli_fc_eval_cfg)
]

View File

@ -0,0 +1,74 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset_V2
tnews_reader_cfg = dict(
input_columns="sentence",
output_column="label_desc2",
)
tnews_labels = [
"农业新闻", # news_agriculture
"旅游新闻", # news_travel
"游戏新闻", # news_game
"科技类别公司新闻", # news_tech
"体育类别新闻", # news_sports
"初升高教育新闻", # news_edu
"娱乐圈新闻", # news_entertainment
"投资资讯", # news_finance
"军事类别常识", # news_military
"车辆新闻", # news_car
"楼市新闻", # news_house
"环球不含中国类别新闻", # news_world
"书籍文化历史类别新闻", # news_culture
"故事类别新闻", # news_story
"股票市场类别新闻", # news_stock
]
_tnews_options_list_str = "\n".join(f'{chr(ord("A") + i)}. {tnews_labels[i]}'
for i in range(len(tnews_labels)))
_tnews_options_range_str = "".join(f'{chr(ord("A") + i)}'
for i in range(len(tnews_labels)))
tnews_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"{{sentence}}\n请判断上述内容属于什么新闻?\n{_tnews_options_list_str}\n请从{_tnews_options_range_str}中进行选择。\n答:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
tnews_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
tnews_datasets = [
dict(
abbr="tnews-dev",
type=TNewsDataset_V2,
path="./data/FewCLUE/tnews/dev_few_all.json",
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg,
),
dict(
abbr="tnews-test",
type=TNewsDataset_V2,
path="./data/FewCLUE/tnews/test_public.json",
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg,
),
]
del _tnews_options_list_str, _tnews_options_range_str

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .FewCLUE_tnews_ppl_784b9e import tnews_datasets # noqa: F401, F403

View File

@ -0,0 +1,301 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GaokaoBenchDataset
_MCQ_prompts = [
{
"type": "single_choice",
"keyword": "2010-2022_Math_II_MCQs",
"prefix_prompt":
"请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_Math_I_MCQs",
"prefix_prompt":
"请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
},
{
"type":
"single_choice",
"keyword":
"2010-2022_History_MCQs",
"prefix_prompt":
"请你做一道历史选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Biology_MCQs",
"prefix_prompt":
"请你做一道生物选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Political_Science_MCQs",
"prefix_prompt":
"请你做一道政治选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"multi_choice",
"keyword":
"2010-2022_Physics_MCQs",
"prefix_prompt":
"请你做一道物理选择题。\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出所有符合题意的答案并写在【答案】和<eoa>之间。\n例如:【答案】 AB <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】... <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Chemistry_MCQs",
"prefix_prompt":
"请你做一道化学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2013_English_MCQs",
"prefix_prompt":
"请你做一道英语选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Chinese_Modern_Lit",
"prefix_prompt":
"请你做一道语文阅读理解题,其中包含三个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_English_Fill_in_Blanks",
"prefix_prompt":
"请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"five_out_of_seven",
"keyword":
"2012-2022_English_Cloze_Test",
"prefix_prompt":
"请回答下面的问题,将符合题意的五个选项的字母写在【答案】和<eoa>之间,例如“【答案】 A B C D E <eoa>\n请严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Geography_MCQs",
"prefix_prompt":
"请你做一道地理选择题,其中包含两到三个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_English_Reading_Comp",
"prefix_prompt":
"请你做一道英语阅读理解题,其中包含三到五个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Chinese_Lang_and_Usage_MCQs",
"prefix_prompt":
"请你做一道语文选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n1【解析】 ... <eoe>\n【答案】 ... <eoa>\n2【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答\n题目如下:"
},
]
_FBQ_prompts = [{
"type": "cloze",
"keyword": "2010-2022_Math_I_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2010-2022_Math_II_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword":
"2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
"prefix_prompt":
"请回答下面的语文填空题\n请你仔细阅读题目,先找到题目对应的中国名篇,再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2014-2022_English_Language_Cloze_Passage",
"prefix_prompt":
"请回答下面的英语短文填词题\n仔细阅读题目,空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考,将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}]
_OEQ_prompts = [
{
"type": "subjective",
"keyword": "2010-2022_Geography_Open-ended_Questions",
"prefix_prompt":
"请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chemistry_Open-ended_Questions",
"prefix_prompt":
"请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_I_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_History_Open-ended_Questions",
"prefix_prompt":
"请解答下面的历史解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Biology_Open-ended_Questions",
"prefix_prompt":
"请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_II_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Physics_Open-ended_Questions",
"prefix_prompt":
"请解答下面的物理解答题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Political_Science_Open-ended_Questions",
"prefix_prompt":
"请解答下面的政治解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "correction",
"keyword": "2012-2022_English_Language_Error_Correction",
"prefix_prompt":
"请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "prefix_prompt": [
# "请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "请比较下面两篇短文找到第二篇和第一篇的10处不同每处不同只涉及一个单词请将结果写在【答案】和<eoa>之间。例如【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下:【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
# ],
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
"prefix_prompt":
"请解答下面的语文古代诗歌阅读题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
"prefix_prompt":
"请解答下面的语文实用类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
"prefix_prompt":
"请解答下面的语文文学类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
"prefix_prompt":
"请解答下面的语文文言文阅读,仔细阅读题目,前三题是单选题,最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面,例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword":
"2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
"prefix_prompt":
"请解答下面的语文解答题,仔细阅读题目,注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}
]
GaokaoBench_datasets = []
for _folder, _prompts in [
("Multiple-choice_Questions", _MCQ_prompts),
("Fill-in-the-blank_Questions", _FBQ_prompts),
("Open-ended_Questions", _OEQ_prompts),
]:
for _p in _prompts:
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '{question}'
}]
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer,
"max_out_len": 1024,
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
GaokaoBench_datasets.append(_dataset)
_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .GaokaoBench_mixed_f2038e import GaokaoBench_datasets # noqa: F401, F403

View File

@ -0,0 +1,353 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
from opencompass.datasets import GaokaoBenchDataset
_MCQ_prompts = [
{
"type": "single_choice",
"keyword": "2010-2022_Math_II_MCQs",
"prefix_prompt":
"请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_Math_I_MCQs",
"prefix_prompt":
"请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
},
{
"type":
"single_choice",
"keyword":
"2010-2022_History_MCQs",
"prefix_prompt":
"请你做一道历史选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Biology_MCQs",
"prefix_prompt":
"请你做一道生物选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Political_Science_MCQs",
"prefix_prompt":
"请你做一道政治选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"multi_choice",
"keyword":
"2010-2022_Physics_MCQs",
"prefix_prompt":
"请你做一道物理选择题。\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出所有符合题意的答案并写在【答案】和<eoa>之间。\n例如:【答案】 AB <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】... <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"single_choice",
"keyword":
"2010-2022_Chemistry_MCQs",
"prefix_prompt":
"请你做一道化学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"single_choice",
"keyword":
"2010-2013_English_MCQs",
"prefix_prompt":
"请你做一道英语选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Chinese_Modern_Lit",
"prefix_prompt":
"请你做一道语文阅读理解题,其中包含三个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_English_Fill_in_Blanks",
"prefix_prompt":
"请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"five_out_of_seven",
"keyword":
"2012-2022_English_Cloze_Test",
"prefix_prompt":
"请回答下面的问题,将符合题意的五个选项的字母写在【答案】和<eoa>之间,例如“【答案】 A B C D E <eoa>\n请严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Geography_MCQs",
"prefix_prompt":
"请你做一道地理选择题,其中包含两到三个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_English_Reading_Comp",
"prefix_prompt":
"请你做一道英语阅读理解题,其中包含三到五个小题。\n请你一步一步思考。每一题你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如1【答案】 A <eoa>\n2【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
},
{
"type":
"multi_question_choice",
"keyword":
"2010-2022_Chinese_Lang_and_Usage_MCQs",
"prefix_prompt":
"请你做一道语文选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从ABCD中选出正确的答案并写在【答案】和<eoa>之间。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n1【解析】 ... <eoe>\n【答案】 ... <eoa>\n2【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答\n题目如下:"
},
]
_FBQ_prompts = [{
"type": "cloze",
"keyword": "2010-2022_Math_I_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2010-2022_Math_II_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword":
"2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
"prefix_prompt":
"请回答下面的语文填空题\n请你仔细阅读题目,先找到题目对应的中国名篇,再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2014-2022_English_Language_Cloze_Passage",
"prefix_prompt":
"请回答下面的英语短文填词题\n仔细阅读题目,空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考,将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}]
_OEQ_prompts = [
{
"type": "subjective",
"keyword": "2010-2022_Geography_Open-ended_Questions",
"prefix_prompt":
"请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chemistry_Open-ended_Questions",
"prefix_prompt":
"请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_I_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_History_Open-ended_Questions",
"prefix_prompt":
"请解答下面的历史解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Biology_Open-ended_Questions",
"prefix_prompt":
"请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_II_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Physics_Open-ended_Questions",
"prefix_prompt":
"请解答下面的物理解答题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Political_Science_Open-ended_Questions",
"prefix_prompt":
"请解答下面的政治解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "correction",
"keyword": "2012-2022_English_Language_Error_Correction",
"prefix_prompt":
"请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "prefix_prompt": [
# "请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "请比较下面两篇短文找到第二篇和第一篇的10处不同每处不同只涉及一个单词请将结果写在【答案】和<eoa>之间。例如【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下:【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
# ],
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
"prefix_prompt":
"请解答下面的语文古代诗歌阅读题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
"prefix_prompt":
"请解答下面的语文实用类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
"prefix_prompt":
"请解答下面的语文文学类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
"prefix_prompt":
"请解答下面的语文文言文阅读,仔细阅读题目,前三题是单选题,最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面,例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword":
"2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
"prefix_prompt":
"请解答下面的语文解答题,仔细阅读题目,注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}
]
GaokaoBench_datasets = []
for _folder, _prompts in [
("Multiple-choice_Questions", _MCQ_prompts),
("Fill-in-the-blank_Questions", _FBQ_prompts),
("Open-ended_Questions", _OEQ_prompts),
]:
for _p in _prompts:
if _p['type'] == "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '{question}'
}]
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer,
"max_out_len": 1024,
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
GaokaoBench_datasets.append(_dataset)
_folder = "Multiple-choice_Questions"
for _p in _MCQ_prompts:
if _p['type'] != "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
answer: {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '{question}'
}, {
"role": "BOT",
"prompt": f"【答案】{answer} <eoa>"
}]
}
for answer in ['A', 'B', 'C', 'D']
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": PPLInferencer
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
GaokaoBench_datasets.append(_dataset)
_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .PJExam_gen_785c37 import PJExam_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets # noqa: F401, F403

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets # noqa: F401, F403

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDataset
BoolQ_reader_cfg = dict(
input_columns=["question", "passage"],
output_column="answer",
test_split="train")
BoolQ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(role="HUMAN", prompt="{passage}\nQuestion: {question}"),
dict(role="BOT", prompt="No."),
]),
1:
dict(round=[
dict(role="HUMAN", prompt="{passage}\nQuestion: {question}"),
dict(role="BOT", prompt="Yes."),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
type=BoolQDataset,
abbr="BoolQ",
path="json",
data_files="./data/SuperGLUE/BoolQ/val.jsonl",
split="train",
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_CB_ppl_32adbb import CB_datasets # noqa: F401, F403

View File

@ -0,0 +1,62 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
CB_reader_cfg = dict(
input_columns=["premise", "hypothesis"],
output_column="label",
)
CB_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"contradiction":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
),
dict(role="BOT", prompt="Contradiction"),
]),
"entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
),
dict(role="BOT", prompt="Entailment"),
]),
"neutral":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
),
dict(role="BOT", prompt="Neutral"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
CB_datasets = [
dict(
type=HFDataset,
abbr="CB",
path="json",
split="train",
data_files="./data/SuperGLUE/CB/val.jsonl",
reader_cfg=CB_reader_cfg,
infer_cfg=CB_infer_cfg,
eval_cfg=CB_eval_cfg,
)
]

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
COPA_reader_cfg = dict(
input_columns=["question", "premise", "choice1", "choice2"],
output_column="label",
test_split="train")
COPA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt="{premise}\nQuestion: What may be the {question}?\nAnswer:"),
dict(role="BOT", prompt="{choice1}"),
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt="{premise}\nQuestion: What may be the {question}?\nAnswer:"),
dict(role="BOT", prompt="{choice2}"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
COPA_datasets = [
dict(
type=HFDataset,
abbr="COPA",
path="json",
data_files="./data/SuperGLUE/COPA/val.jsonl",
split="train",
reader_cfg=COPA_reader_cfg,
infer_cfg=COPA_infer_cfg,
eval_cfg=COPA_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AXDataset_V2
RTE_reader_cfg = dict(
input_columns=["hypothesis", "premise"],
output_column="label",
)
RTE_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
RTE_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
RTE_datasets = [
dict(
abbr="RTE",
type=AXDataset_V2, # rte share the same format with ax
path="./data/SuperGLUE/RTE/val.jsonl",
reader_cfg=RTE_reader_cfg,
infer_cfg=RTE_infer_cfg,
eval_cfg=RTE_eval_cfg,
)
]

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
RTE_reader_cfg = dict(
input_columns=["hypothesis", "premise"],
output_column="label",
test_split="train")
RTE_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="Yes"),
]),
"not_entailment":
dict(round=[
dict(
role="HUMAN",
prompt=
"{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
),
dict(role="BOT", prompt="No"),
])
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
RTE_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
RTE_datasets = [
dict(
type=HFDataset,
abbr="RTE",
path="json",
data_files="./data/SuperGLUE/RTE/val.jsonl",
split="train",
reader_cfg=RTE_reader_cfg,
infer_cfg=RTE_infer_cfg,
eval_cfg=RTE_eval_cfg,
)
]

View File

@ -0,0 +1,29 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import ReCoRDDataset
ReCoRD_reader_cfg = dict(
input_columns=['question', 'text'], output_column='answers')
ReCoRD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
"Passage:{text}\nResult:{question}\nQuestion: What entity does ____ refer to in the result?Give me the entity name:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
ReCoRD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type='ReCoRD'))
ReCoRD_datasets = [
dict(
type=ReCoRDDataset,
abbr='ReCoRD',
path='./data/SuperGLUE/ReCoRD/val.jsonl',
reader_cfg=ReCoRD_reader_cfg,
infer_cfg=ReCoRD_infer_cfg,
eval_cfg=ReCoRD_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .SuperGLUE_WSC_ppl_85f45f import WSC_datasets # noqa: F401, F403

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset
WSC_reader_cfg = dict(
input_columns=['span1', 'span2', 'text', 'new_text'],
output_column='answer')
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "{text}",
1: "{new_text}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WSC_datasets = [
dict(
type=WSCDataset,
path='json',
abbr='WSC',
data_files='./data/SuperGLUE/WSC/val.jsonl',
split='train',
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset
WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text", "new_text"],
output_column="answer",
)
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: dict(round=[
dict(role="HUMAN", prompt="{text}"),
]),
1: dict(round=[
dict(role="HUMAN", prompt="{new_text}"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WSC_datasets = [
dict(
type=WSCDataset,
path="json",
abbr="WSC",
data_files="./data/SuperGLUE/WSC/val.jsonl",
split="train",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .TheoremQA_gen_891fcf import TheoremQA_datasets # noqa: F401, F403

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
"""You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:\n1. a numerical value like 0.1, no symbol and no unit at all.\n2. a list of number like [2, 3, 4].\n3. True/False.\n4. an option like (a), (b), (c), (d)\nQuestion: {Question}\nLet\'s think step by step."""
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]

View File

@ -0,0 +1,48 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset
TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = 'Question: {Question}\nLet\'s think step by step.'
TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=TheoremQA_prompt1),
],
round=[
dict(role='HUMAN', prompt=TheoremQA_prompt2),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='TheoremQA'))
TheoremQA_datasets = [
dict(
abbr='TheoremQA',
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .Xsum_gen_d2126e import Xsum_datasets # noqa: F401, F403

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import XsumDataset
Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')
Xsum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Document{dialogue}\n'
'Based on the previous text, provide a brief single summary:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
Xsum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_postprocessor=dict(type='Xsum'),
)
Xsum_datasets = [
dict(
type=XsumDataset,
abbr='Xsum',
path='./data/Xsum/dev.jsonl',
reader_cfg=Xsum_reader_cfg,
infer_cfg=Xsum_infer_cfg,
eval_cfg=Xsum_eval_cfg)
]

View File

@ -0,0 +1,39 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import XsumDataset
Xsum_reader_cfg = dict(input_columns=["dialogue"], output_column="summary")
Xsum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Document{dialogue}\nBased on the previous text, provide a brief single summary:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
Xsum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type="Xsum"),
)
Xsum_datasets = [
dict(
type=XsumDataset,
abbr="Xsum",
path="./data/Xsum/dev.jsonl",
reader_cfg=Xsum_reader_cfg,
infer_cfg=Xsum_infer_cfg,
eval_cfg=Xsum_eval_cfg,
)
]

View File

@ -0,0 +1,84 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
agieval_reader_cfg = dict(
input_columns=['problem_input'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
# 'jec-qa-kd', # 数据需要额外处理
# 'jec-qa-ca', # 数据需要额外处理
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_datasets = []
for name in agieval_single_choice_sets:
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt='{problem_input}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, labels=['A', 'B', 'C', 'D']))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_cloze_sets:
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role="HUMAN", prompt='{problem_input}')])),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(type=GenInferencer))
agieval_eval_cfg = dict(
evaluator=dict(type=AGIEvalEvaluator), pred_role="BOT")
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
del name, agieval_infer_cfg, agieval_eval_cfg

View File

@ -0,0 +1,203 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
agieval_reader_cfg = dict(
input_columns=['question', 'options'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd',
'jec-qa-ca',
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_chinese_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'gaokao-mathcloze',
]
agieval_english_sets = [
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
'math',
]
agieval_gaokao_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
]
agieval_datasets = []
for _name in agieval_single_choice_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_multiple_choices_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_cloze_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt=f'{{question}}\n{_hint}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _item in agieval_datasets:
_name = _item['name']
_intro = {
'gaokao-chinese':
'以下是一道中国高考语文选择题,请选择正确的答案。',
'gaokao-english':
'以下是一道中国高考英语选择题,请选择正确的答案。',
'gaokao-geography':
'以下是一道中国高考地理选择题,请选择正确的答案。',
'gaokao-history':
'以下是一道中国高考历史选择题,请选择正确的答案。',
'gaokao-biology':
'以下是一道中国高考生物选择题,请选择正确的答案。',
'gaokao-chemistry':
'以下是一道中国高考化学选择题,请选择正确的答案。',
'gaokao-physics':
'以下是一道中国高考物理选择题,请选择正确的答案。',
'gaokao-mathqa':
'以下是一道中国高考数学选择题,请选择正确的答案。',
'logiqa-zh':
'以下是一道中国公务员考试题,请选择正确的答案。',
'lsat-ar':
'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
'lsat-lr':
'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
'lsat-rc':
'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
'logiqa-en':
'The following is a Logic Reasoning question. Please select the correct answer.',
'sat-math':
'The following is a SAT Math question. Please select the correct answer.',
'sat-en':
'The following is a SAT English question. Please select the correct answer.',
'sat-en-without-passage':
'The following is a SAT English question. Please select the correct answer.',
'aqua-rat':
'The following is a AQUA-RAT question. Please select the correct answer.',
'jec-qa-kd':
'以下是一道中国司法考试基础知识题,请选择正确的答案。',
'jec-qa-ca':
'以下是一道中国司法考试案例分析题,请选择正确的答案。',
'gaokao-mathcloze':
'以下是一道中国高考数学填空题,请填入正确的答案。',
'math':
'The following is a Math question. Please select the correct answer.',
}[_name]
_templates = _item['infer_cfg']['prompt_template']['template']
_templates['round'][0][
'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
del _item, _intro, _templates, _name, _hint, agieval_infer_cfg, agieval_eval_cfg

View File

@ -0,0 +1,217 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd',
'jec-qa-ca',
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_chinese_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'gaokao-mathcloze',
]
agieval_english_sets = [
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
'math',
]
agieval_gaokao_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
]
agieval_datasets = []
for _name in agieval_single_choice_sets:
if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
_options = ['A', 'B', 'C', 'D', 'E']
else:
_options = ['A', 'B', 'C', 'D']
if _name in agieval_chinese_sets:
_hint = '答案是:'
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
label: dict(round=[
dict(role='HUMAN', prompt='{question}\n{options}'),
dict(role='BOT', prompt=f'{_hint}{label}')
])
for label in _options
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer, labels=_options))
agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'] + _options,
output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_multiple_choices_sets:
if _name in agieval_chinese_sets:
_hint = '答案是: '
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'], output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _name in agieval_cloze_sets:
if _name in agieval_chinese_sets:
_hint = '答案是:'
else:
_hint = 'The answer is '
agieval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role='HUMAN', prompt=f'{{question}}{_hint}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset_v2,
path='./data/AGIEval/data/v1/',
name=_name,
abbr='agieval-' + _name,
setting_name='zero-shot',
reader_cfg=dict(
input_columns=['question', 'options'], output_column='label'),
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for _item in agieval_datasets:
_name = _item['name']
_intro = {
'gaokao-chinese':
'以下是一道中国高考语文选择题,请选择正确的答案。',
'gaokao-english':
'以下是一道中国高考英语选择题,请选择正确的答案。',
'gaokao-geography':
'以下是一道中国高考地理选择题,请选择正确的答案。',
'gaokao-history':
'以下是一道中国高考历史选择题,请选择正确的答案。',
'gaokao-biology':
'以下是一道中国高考生物选择题,请选择正确的答案。',
'gaokao-chemistry':
'以下是一道中国高考化学选择题,请选择正确的答案。',
'gaokao-physics':
'以下是一道中国高考物理选择题,请选择正确的答案。',
'gaokao-mathqa':
'以下是一道中国高考数学选择题,请选择正确的答案。',
'logiqa-zh':
'以下是一道中国公务员考试题,请选择正确的答案。',
'lsat-ar':
'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
'lsat-lr':
'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
'lsat-rc':
'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
'logiqa-en':
'The following is a Logic Reasoning question. Please select the correct answer.',
'sat-math':
'The following is a SAT Math question. Please select the correct answer.',
'sat-en':
'The following is a SAT English question. Please select the correct answer.',
'sat-en-without-passage':
'The following is a SAT English question. Please select the correct answer.',
'aqua-rat':
'The following is a AQUA-RAT question. Please select the correct answer.',
'jec-qa-kd':
'以下是一道中国司法考试基础知识题,请选择正确的答案。',
'jec-qa-ca':
'以下是一道中国司法考试案例分析题,请选择正确的答案。',
'gaokao-mathcloze':
'以下是一道中国高考数学填空题,请填入正确的答案。',
'math':
'The following is a Math question. Please select the correct answer.',
}[_name]
_templates = _item['infer_cfg']['prompt_template']['template']
if _item['infer_cfg']['inferencer']['type'] == PPLInferencer:
for _label in _templates:
_templates[_label]['round'][0][
'prompt'] = _intro + '\n' + _templates[_label]['round'][0][
'prompt']
else:
_templates['round'][0][
'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
del _item, _intro, _templates, _label, _name, _options, _hint, agieval_infer_cfg, agieval_eval_cfg

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Write a python program:'),
],
round=[
dict(role='HUMAN', prompt='{question}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]

View File

@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt='Write a python program:\n{question}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
# TODO: allow empty output-column
apps_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{question}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100],
pred_postprocessor=dict(type='humaneval'),
)
apps_datasets = [
dict(
type=HFDataset,
path='codeparrot/apps',
reader_cfg=apps_reader_cfg,
infer_cfg=apps_infer_cfg,
eval_cfg=apps_eval_cfg)
]

View File

@ -0,0 +1,100 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BBHDataset, BBHEvaluator
bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
_path_prefix = "./data/BBH"
bbh_multiple_choice_sets = [
'temporal_sequences',
'disambiguation_qa',
'date_understanding',
'tracking_shuffled_objects_three_objects',
'penguins_in_a_table',
'geometric_shapes',
'snarks',
'ruin_names',
'tracking_shuffled_objects_seven_objects',
'tracking_shuffled_objects_five_objects',
'logical_deduction_three_objects',
'hyperbaton',
'logical_deduction_five_objects',
'logical_deduction_seven_objects',
'movie_recommendation',
'salient_translation_error_detection',
'reasoning_about_colored_objects',
]
bbh_free_form_sets = [
'multistep_arithmetic_two',
'navigate',
'dyck_languages',
'word_sorting',
'sports_understanding',
'boolean_expressions',
'object_counting',
'formal_fallacies',
'causal_judgement',
'web_of_lies',
]
bbh_datasets = []
for _name in bbh_multiple_choice_sets:
_hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
bbh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
bbh_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='bbh-mcq'),
dataset_postprocessor=dict(type='bbh-mcq'))
bbh_datasets.append(
dict(
type=BBHDataset,
path=f"{_path_prefix}/data",
name=_name,
abbr='bbh-' + _name,
reader_cfg=bbh_reader_cfg,
infer_cfg=bbh_infer_cfg.copy(),
eval_cfg=bbh_eval_cfg.copy()))
for _name in bbh_free_form_sets:
_hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
bbh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role="BOT")
bbh_datasets.append(
dict(
type=BBHDataset,
path=f"{_path_prefix}/data",
name=_name,
abbr='bbh-' + _name,
reader_cfg=bbh_reader_cfg,
infer_cfg=bbh_infer_cfg.copy(),
eval_cfg=bbh_eval_cfg.copy()))
del _name, _hint, _path_prefix

View File

@ -0,0 +1,187 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val", "test"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt="{answer}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name

View File

@ -0,0 +1,185 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt="{answer}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .ceval_ppl_275812 import ceval_datasets # noqa: F401, F403

View File

@ -0,0 +1,188 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val", "test"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt=answer),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name

View File

@ -0,0 +1,188 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt=answer),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt=ans_token),
])
for ans, ans_token in [["A", "{A}"], ["B", "{B}"],
["C", "{C}"], ["D", "{D}"],
["E", "{E}"]]
},
ice_token='</E>')
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template),
inferencer=dict(type=PPLInferencer))
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path='commonsense_qa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .crowspairs_ppl_f60797 import crowspairs_datasets # noqa: F401, F403

View File

@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import dropDataset
drop_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
Text: Playing in their second straight Thanksgiving game, the Eagles struggled especially on defense, where they were unable to stop the much-hyped Lions offense. The worst of it all was how unproven rookie Eric Rowe was tasked with covering wide receiver Calvin Johnson, leading to Johnson catching 3 touchdowns. Staffords five passing touchdowns, including three of them to Johnson was too much for the Eagles to overcome and for the second consecutive time this season, the Eagles gave up 45 points in a game. With the loss, the Eagles drop to 4-7 on the season and 6-1 when playing on Thanksgiving.
Question: How many TD passes did Stafford throw other than to Johnson?
Anawer: According to the text, Stafford threw 5 TD passes, 3 of which were to Johnson. 5-3=2. So the answer is 2.
Text: {prompt}
Question: {question}
Anawer:'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
drop_eval_cfg = dict(
evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(
type='gsm8k')) # use the same processor to find answer
drop_datasets = [
dict(
abbr='drop',
type=dropDataset,
path='drop',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answers',
train_split='validation',
test_split='validation',
),
infer_cfg=drop_infer_cfg,
eval_cfg=drop_eval_cfg)
]

View File

@ -0,0 +1,154 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import TopkRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import FloresFirst100Dataset
_flores_lang_map = [
["eng", "eng_Latn", "English", "Indo-European-Germanic"],
["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
["deu", "deu_Latn", "German", "Indo-European-Germanic"],
["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
["fra", "fra_Latn", "French", "Indo-European-Romance"],
["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
["ell", "ell_Grek", "Greek", "Indo-European-Other"],
["fas", "pes_Arab", "Persian", "Indo-European-Other"],
["gle", "gle_Latn", "Irish", "Indo-European-Other"],
["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
["ita", "ita_Latn", "Italian", "Indo-European-Other"],
["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
["ind", "ind_Latn", "Indonesian", "Austronesian"],
["jav", "jav_Latn", "Javanese", "Austronesian"],
["mri", "mri_Latn", "Maori", "Austronesian"],
["msa", "zsm_Latn", "Malay", "Austronesian"],
["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
["som", "som_Latn", "Somali", "Afro-Asiatic"],
["azj", "azj_Latn", "Azerbaijani", "Turkic"],
["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
["tur", "tur_Latn", "Turkish", "Turkic"],
["uzb", "uzn_Latn", "Uzbek", "Turkic"],
["kan", "kan_Knda", "Kannada", "Dravidian"],
["mal", "mal_Mlym", "Malayalam", "Dravidian"],
["tam", "tam_Taml", "Tamil", "Dravidian"],
["tel", "tel_Telu", "Telugu", "Dravidian"],
["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
["est", "est_Latn", "Estonian", "Other"],
["fin", "fin_Latn", "Finnish", "Other"],
["hau", "hau_Latn", "Hausa", "Other"],
["heb", "heb_Hebr", "Hebrew", "Other"],
["hun", "hun_Latn", "Hungarian", "Other"],
["jpn", "jpn_Jpan", "Japanese", "Other"],
["kat", "kat_Geor", "Georgian", "Other"],
["khm", "khm_Khmr", "Khmer", "Other"],
["kor", "kor_Hang", "Korean", "Other"],
["lao", "lao_Laoo", "Lao", "Other"],
["luo", "luo_Latn", "Luo", "Other"],
["mon", "khk_Cyrl", "Mongolian", "Other"],
["tha", "tha_Thai", "Thai", "Other"],
["vie", "vie_Latn", "Vietnamese", "Other"],
]
flores_lang_map = {i[0]: i for i in _flores_lang_map}
_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
flores_datasets = []
for _flores_subtask in _flores_subtasks:
_src, _tgt = _flores_subtask.split("-")
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=f"</E>{{sentence_{_flores_source}}} = {{sentence_{_flores_target}}}" if _flores_subtask != "zho_simpl-eng"
else f"</E>Chinese: {{sentence_{_flores_source}}}\nEnglish: {{sentence_{_flores_target}}}",
ice_token="</E>",
),
retriever=dict(type=TopkRetriever, ice_num=8),
inferencer=dict(type=GenInferencer),
)
flores_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="flores"),
dataset_postprocessor=dict(type="flores"),
)
if _tgt == "zho_simpl":
flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -0,0 +1,163 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import TopkRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import FloresFirst100Dataset
_flores_lang_map = [
["eng", "eng_Latn", "English", "Indo-European-Germanic"],
["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
["deu", "deu_Latn", "German", "Indo-European-Germanic"],
["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
["fra", "fra_Latn", "French", "Indo-European-Romance"],
["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
["ell", "ell_Grek", "Greek", "Indo-European-Other"],
["fas", "pes_Arab", "Persian", "Indo-European-Other"],
["gle", "gle_Latn", "Irish", "Indo-European-Other"],
["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
["ita", "ita_Latn", "Italian", "Indo-European-Other"],
["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
["ind", "ind_Latn", "Indonesian", "Austronesian"],
["jav", "jav_Latn", "Javanese", "Austronesian"],
["mri", "mri_Latn", "Maori", "Austronesian"],
["msa", "zsm_Latn", "Malay", "Austronesian"],
["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
["som", "som_Latn", "Somali", "Afro-Asiatic"],
["azj", "azj_Latn", "Azerbaijani", "Turkic"],
["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
["tur", "tur_Latn", "Turkish", "Turkic"],
["uzb", "uzn_Latn", "Uzbek", "Turkic"],
["kan", "kan_Knda", "Kannada", "Dravidian"],
["mal", "mal_Mlym", "Malayalam", "Dravidian"],
["tam", "tam_Taml", "Tamil", "Dravidian"],
["tel", "tel_Telu", "Telugu", "Dravidian"],
["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
["est", "est_Latn", "Estonian", "Other"],
["fin", "fin_Latn", "Finnish", "Other"],
["hau", "hau_Latn", "Hausa", "Other"],
["heb", "heb_Hebr", "Hebrew", "Other"],
["hun", "hun_Latn", "Hungarian", "Other"],
["jpn", "jpn_Jpan", "Japanese", "Other"],
["kat", "kat_Geor", "Georgian", "Other"],
["khm", "khm_Khmr", "Khmer", "Other"],
["kor", "kor_Hang", "Korean", "Other"],
["lao", "lao_Laoo", "Lao", "Other"],
["luo", "luo_Latn", "Luo", "Other"],
["mon", "khk_Cyrl", "Mongolian", "Other"],
["tha", "tha_Thai", "Thai", "Other"],
["vie", "vie_Latn", "Vietnamese", "Other"],
]
flores_lang_map = {i[0]: i for i in _flores_lang_map}
_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
flores_datasets = []
for _flores_subtask in _flores_subtasks:
_src, _tgt = _flores_subtask.split("-")
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}"
),
dict(role="BOT", prompt=f"{{sentence_{_flores_target}}}"),
],
),
ice_token="</E>",
),
retriever=dict(type=TopkRetriever, ice_num=8),
inferencer=dict(type=GenInferencer),
)
flores_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="flores"),
dataset_postprocessor=dict(type="flores"),
)
if _tgt == "zho_simpl":
flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -0,0 +1,368 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer, GLMChoiceInferencer
from opencompass.datasets import GaokaoBenchDataset
MCQ_TMPL = """\
请你做一道{type}
请你一步一步思考并将思考过程写在[解析]<eoe>之间你将从ABCD中选出正确的答案并写在答案]<eoa>之间
例如[答案]: A <eoa>
完整的题目回答的格式如下
回答[解析] ... <eoe>
[答案] ... <eoa>
请你严格按照上述格式作答
题目如下{{question}}
回答"""
MULTI_MCQ_TMPL = """\
请你做一道{type}
请你一步一步思考每一题你将从ABCD中选出正确的答案并写在[答案]<eoa>之间
例如1[答案] A <eoa>
(2)[答案] B <eoa>
请你严格按照上述格式作答
题目如下{{question}}
回答"""
CLOZE_TMPL = """\
请你做一道{type}
请你一步一步思考将符合题意的五个选项的字母写在[答案]<eoa>之间
例如[答案] A B C D E <eoa>
请严格按照上述格式作答
题目如下{{question}}
回答"""
_MCQ_prompts = [
{
"type": "single_choice",
"keyword": "2010-2022_Math_II_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_Math_I_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_History_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='历史选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Biology_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='生物选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Political_Science_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='政治选择题'),
},
{
"type": "multi_choice",
"keyword": "2010-2022_Physics_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='物理选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Chemistry_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='化学选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2013_English_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='英语选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Modern_Lit",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='语文阅读理解题,其中包含三个小题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Fill_in_Blanks",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语完形填空题,其中包含二十个小题'),
},
{
"type": "five_out_of_seven",
"keyword": "2012-2022_English_Cloze_Test",
"prefix_prompt": CLOZE_TMPL.format(type='英语完形填空题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Geography_MCQs",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='地理选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Reading_Comp",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语阅读理解题,其中包含三到五个小题。'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Lang_and_Usage_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='语文选择题'),
},
]
_FBQ_prompts = [{
"type": "cloze",
"keyword": "2010-2022_Math_I_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2010-2022_Math_II_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword":
"2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
"prefix_prompt":
"请回答下面的语文填空题\n请你仔细阅读题目,先找到题目对应的中国名篇,再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2014-2022_English_Language_Cloze_Passage",
"prefix_prompt":
"请回答下面的英语短文填词题\n仔细阅读题目,空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考,将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n1【解析】 ...<eoe>\n【答案】...<eoa>\n2【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}]
_OEQ_prompts = [
{
"type": "subjective",
"keyword": "2010-2022_Geography_Open-ended_Questions",
"prefix_prompt":
"请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chemistry_Open-ended_Questions",
"prefix_prompt":
"请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_I_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_History_Open-ended_Questions",
"prefix_prompt":
"请解答下面的历史解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Biology_Open-ended_Questions",
"prefix_prompt":
"请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_II_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Physics_Open-ended_Questions",
"prefix_prompt":
"请解答下面的物理解答题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Political_Science_Open-ended_Questions",
"prefix_prompt":
"请解答下面的政治解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "correction",
"keyword": "2012-2022_English_Language_Error_Correction",
"prefix_prompt":
"请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "prefix_prompt": [
# "请解答下面的英语短文改错题仔细阅读题目并充分结合你你已有的知识找出其中10处需要改动的地方。请你一步步思考把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "请比较下面两篇短文找到第二篇和第一篇的10处不同每处不同只涉及一个单词请将结果写在【答案】和<eoa>之间。例如【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下:【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
# ],
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
"prefix_prompt":
"请解答下面的语文古代诗歌阅读题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
"prefix_prompt":
"请解答下面的语文实用类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
"prefix_prompt":
"请解答下面的语文文学类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
"prefix_prompt":
"请解答下面的语文文言文阅读,仔细阅读题目,前三题是单选题,最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面,例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下1[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword":
"2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
"prefix_prompt":
"请解答下面的语文解答题,仔细阅读题目,注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案例如“【答案】A <eoa>”。\n完整的题目回答格式如下1【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}
]
gaokao_bench_datasets = []
for _folder, _prompts in [
("Multiple-choice_Questions", _MCQ_prompts),
("Fill-in-the-blank_Questions", _FBQ_prompts),
("Open-ended_Questions", _OEQ_prompts),
]:
for _p in _prompts:
if _p['type'] == "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role":
"HUMAN",
"prompt":
_p['prefix_prompt'].format(question='</question>')
}]
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
# Single choice dataset
_folder = "Multiple-choice_Questions"
for _p in _MCQ_prompts:
if _p['type'] != "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
answer: {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": f"【答案】{answer} <eoa>"
}]
}
for answer in ['A', 'B', 'C', 'D']
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"prompt_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": "【答案】("
}]
},
"column_token_map": {
"question": "</question>"
},
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GLMChoiceInferencer,
"choices": ['A', 'B', 'C', 'D'],
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t

View File

@ -0,0 +1,32 @@
_base_ = [
'bustm.py',
'afqmc.py',
'eprstmt.py',
'ocnli_fc.py',
'ocnli.py',
'cmnli.py',
'csl.py',
'chid.py',
'cluewsc.py',
'tnews.py',
'C3.py',
'CMRC.py',
'DRCD.py',
'lcsts.py',
'piqa.py',
'commonsenseqa.py',
'gsm8k.py',
'flores.py',
'humaneval.py',
'mbpp.py',
'triviaqa.py',
'nq.py',
'agieval.py',
'mmlu.py',
'ceval.py',
]
datasets = []
for k, v in _base_.items():
if k.endswith("_datasets"):
datasets += v

View File

@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
afqmc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
# 0: "{sentence1},{sentence2}不同。",
# 1: "{sentence1},{sentence2}相似。"
0:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]no",
1:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]yes",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
afqmc_datasets = [
dict(
type=HFDataset,
abbr='afqmc',
path='json',
data_files='./data/CLUE/AFQMC/test_public.json',
split='train',
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg)
]

View File

@ -0,0 +1,110 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset
agieval_reader_cfg = dict(
input_columns=['problem_input'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd', # 数据需要额外处理
'jec-qa-ca', # 数据需要额外处理
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_datasets = []
for name in agieval_single_choice_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
label: f'{{problem_input}} {label}'
for label in ['A', 'B', 'C', 'D']
}),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(
type=GLMChoiceInferencer, choices=['A', 'B', 'C', 'D']))
agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_multiple_choices_sets:
_hint = '答案是: '
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
]),
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type='AGIEvalDataset_v2',
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_cloze_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='</E>{problem_input}',
ice_token='</E>'),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(evaluator=dict(type='AGIEvalEvaluator'))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
del name, agieval_infer_cfg, agieval_eval_cfg

View File

@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
cmnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]

View File

@ -0,0 +1,46 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CslDataset
csl_reader_cfg = dict(
input_columns=["abst", "keywords"], output_column='label')
csl_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
0: "</E>摘要:</A>",
1: "</E>摘要:</A>关键词:</K>"
},
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template=
'</E>Abstract: </A>\nKeyword: </K>\n Does all keywords come from the given abstract? (Yes or No)',
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=['No', 'Yes']))
csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
csl_datasets = [
dict(
type=CslDataset,
path='json',
abbr='csl',
data_files='./data/FewCLUE/csl/test_public.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg)
]

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'], output_column='label')
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ocnli_datasets = [
dict(
type=HFDataset,
abbr='ocnli',
path='json',
split='train',
data_files='./data/CLUE/OCNLI/dev.json',
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg)
]

View File

@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]

View File

@ -0,0 +1,51 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]

View File

@ -0,0 +1,88 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'''Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?
Let's think step by step
Answer:
Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.
For the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.
Angelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.
However, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.
They also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.
And they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.
So Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.
They want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75
They will need to plan to study 4 days to allow for all the time they need.
The answer is 4
Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?
Let's think step by step
Answer:
Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.
His team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers
They scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.
All together his team scored 50+24+10= 84 points
Mark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.
His opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.
They also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.
All together Mark's opponents scored 100+12+5=117 points
The total score for the game is both team's scores added together, so it is 84+117=201 points
The answer is 201
Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?
Let's think step by step
Answer:
When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24
The total number of marbles she'll have is 60+24 = 84
If Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.
If Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.
The total number of frisbees she'll have will increase to 30+12 = 42
Bella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards
If she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.
The total number of deck cards she'll have is 10+4 = 14
Together, Bella will have a total of 14+42+84 = 140 items
The answer is 140
Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?
Let's think step by step
Answer:
For the first three baskets, the number of apples and oranges in one basket is 9+15=24
In total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.
Since there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.
The number of apples in the fourth basket is 9-2=7
There are also 15-2=13 oranges in the fourth basket
The combined number of oranges and apples in the fourth basket is 13+7=20
The fourth basket also contains 14-2=12 bananas.
In total, the fourth basket has 20+12=32 fruits.
The four baskets together have 32+114=146 fruits.
The answer is 146
Question: {question}{answer}
'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
gsm8k_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]

View File

@ -0,0 +1,87 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'''Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?
Let's think step by step
Answer:
Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.
For the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.
Angelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.
However, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.
They also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.
And they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.
So Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.
They want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75
They will need to plan to study 4 days to allow for all the time they need.
The answer is 4
Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?
Let's think step by step
Answer:
Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.
His team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers
They scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.
All together his team scored 50+24+10= 84 points
Mark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.
His opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.
They also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.
All together Mark's opponents scored 100+12+5=117 points
The total score for the game is both team's scores added together, so it is 84+117=201 points
The answer is 201
Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?
Let's think step by step
Answer:
When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24
The total number of marbles she'll have is 60+24 = 84
If Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.
If Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.
The total number of frisbees she'll have will increase to 30+12 = 42
Bella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards
If she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.
The total number of deck cards she'll have is 10+4 = 14
Together, Bella will have a total of 14+42+84 = 140 items
The answer is 140
Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?
Let's think step by step
Answer:
For the first three baskets, the number of apples and oranges in one basket is 9+15=24
In total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.
Since there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.
The number of apples in the fourth basket is 9-2=7
There are also 15-2=13 oranges in the fourth basket
The combined number of oranges and apples in the fourth basket is 13+7=20
The fourth basket also contains 14-2=12 bananas.
In total, the fourth basket has 20+12=32 fruits.
The four baskets together have 32+114=146 fruits.
The answer is 146
Question: {question}{answer}
'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
gsm8k_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]

View File

@ -0,0 +1,35 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label',
train_split='validation',
test_split='validation')
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="{ctx}"),
dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"),
])
for i in range(4)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
type=hellaswagDataset,
path='hellaswag',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label',
train_split='validation',
test_split='validation')
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "{ctx} {A}",
1: "{ctx} {B}",
2: "{ctx} {C}",
3: "{ctx} {D}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
type=hellaswagDataset,
path='hellaswag',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt='Complete the following python code:'),
],
round=[
dict(role='HUMAN', prompt='{prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .iwslt2017_gen_02ea0b import iwslt2017_datasets # noqa: F401, F403

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import BM25Retriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import IWSLT2017Dataset
iwslt2017_reader_cfg = dict(
input_columns='en', output_column='de', train_split='validation')
iwslt2017_infer_cfg = dict(
ice_template=dict(type='PromptTemplate',
template=dict(
begin=[
dict(role='SYSTEM', fallback_role="HUMAN", prompt='Please translate the following English statements to German:'),
'</E>',
],
round=[
dict(role='HUMAN', prompt='{en}'),
dict(role='BOT', prompt='{de}'),
]
),
ice_token='</E>'),
retriever=dict(type=BM25Retriever, ice_num=1),
inferencer=dict(type=GenInferencer))
iwslt2017_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
iwslt2017_datasets = [
dict(
type=IWSLT2017Dataset,
path='iwslt2017',
name='iwslt2017-en-de',
reader_cfg=iwslt2017_reader_cfg,
infer_cfg=iwslt2017_infer_cfg,
eval_cfg=iwslt2017_eval_cfg)
]

View File

@ -0,0 +1,29 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import lambadaDataset, LambadaEvaluator
lambada_reader_cfg = dict(
input_columns=['prompt'],
output_column='label',
train_split='test',
test_split='test')
lambada_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Please complete the following sentence: {prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=5))
lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator))
lambada_datasets = [
dict(
abbr='lambada',
type=lambadaDataset,
path='craffel/openai_lambada',
reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg)
]

View File

@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Problem:
Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
Solution:
The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
Final Answer: The final answer is $[2,5)$. I hope it is correct.
Problem:
If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
Solution:
We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
Final Answer: The final answer is $24$. I hope it is correct.
Problem:
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
Solution:
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
Final Answer: The final answer is $16$. I hope it is correct.
Problem:
If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
Solution:
If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
Problem:
{problem}Solution:
{solution}'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='./data/math/math.json',
reader_cfg=dict(
input_columns=['problem'],
output_column='solution',
),
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]

View File

@ -0,0 +1,64 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator
mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='code')
mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"
),
dict(role="BOT", prompt="[BEGIN]\n"),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]

View File

@ -0,0 +1,123 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_prompt_template = dict(
type='PromptTemplate',
template=None,
ice_token='</E>')
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '
),
dict(role='BOT', prompt='{target}\n')
])),
prompt_template=mmlu_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]))
mmlu_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital'))
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(),
eval_cfg=mmlu_eval_cfg))
mmlu_datasets[-1]['infer_cfg'][
'prompt_template'] = mmlu_prompt_template.copy()
mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = \
dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt=f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.'),
'</E>',
],
round=[
dict(role='HUMAN', prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '),
]
)
del _name

View File

@ -0,0 +1,113 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUDataset
# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
mmlu_reader_cfg = dict(
input_columns=["input", "A", "B", "C", "D"],
output_column="target",
train_split='dev')
mmlu_all_sets = [
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_physics",
"electrical_engineering",
"astronomy",
"anatomy",
"abstract_algebra",
"machine_learning",
"clinical_knowledge",
"global_facts",
"management",
"nutrition",
"marketing",
"professional_accounting",
"high_school_geography",
"international_law",
"moral_scenarios",
"computer_security",
"high_school_microeconomics",
"professional_law",
"medical_genetics",
"professional_psychology",
"jurisprudence",
"world_religions",
"philosophy",
"virology",
"high_school_chemistry",
"public_relations",
"high_school_macroeconomics",
"human_sexuality",
"elementary_mathematics",
"high_school_physics",
"high_school_computer_science",
"high_school_european_history",
"business_ethics",
"moral_disputes",
"high_school_statistics",
"miscellaneous",
"formal_logic",
"high_school_government_and_politics",
"prehistory",
"security_studies",
"high_school_biology",
"logical_fallacies",
"high_school_world_history",
"professional_medicine",
"high_school_mathematics",
"college_medicine",
"high_school_us_history",
"sociology",
"econometrics",
"high_school_psychology",
"human_aging",
"us_foreign_policy",
"conceptual_physics",
]
mmlu_datasets = []
for _name in mmlu_all_sets:
_hint = f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.\n\n'
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
opt:
f"{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}\n"
for opt in ["A", "B", "C", "D"]
},
),
prompt_template=dict(
type=PromptTemplate,
template={
opt:
f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}"
for opt in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
mmlu_datasets.append(
dict(
abbr=f"lukaemon_mmlu_{_name}",
type=MMLUDataset,
path="./data/mmlu/",
name=_name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg,
eval_cfg=mmlu_eval_cfg,
))
del _name, _hint

View File

@ -0,0 +1,27 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="Answer these questions:\nQ: {question}?\nA:{answer}",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='./data/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .obqa_gen_b2cde9 import obqa_datasets # noqa: F401, F403

View File

@ -0,0 +1,62 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import OBQADataset
_input_columns = [
["question_stem", "A", "B", "C", "D"],
["question_stem", "A", "B", "C", "D", "fact1"],
]
_template = [
dict(
round=[
dict(
role="HUMAN",
prompt="Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
),
], ),
dict(
round=[
dict(
role="HUMAN",
prompt="Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
),
], ),
]
obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
),
]
for _i in range(2):
obqa_reader_cfg = dict(
input_columns=_input_columns[_i], output_column="answerKey")
obqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=_template[_i]),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
obqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg

View File

@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
qasper_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
qasper_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt='{evidence}\nAnswer these questions:\nQ: {question}?A:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT')
qasper_datasets = [
dict(
type=QASPERDataset,
abbr='QASPER',
path='./data/QASPER/',
reader_cfg=qasper_reader_cfg,
infer_cfg=qasper_infer_cfg,
eval_cfg=qasper_eval_cfg)
]

View File

@ -0,0 +1,30 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
qasper_reader_cfg = dict(
input_columns=['question', 'evidence'],
output_column='answer',
train_split='dev',
test_split='dev')
qasper_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
qasper_datasets = [
dict(
type=QASPERDataset,
abbr='QASPER',
path='./data/QASPER/',
reader_cfg=qasper_reader_cfg,
infer_cfg=qasper_infer_cfg,
eval_cfg=qasper_eval_cfg)
]

View File

@ -0,0 +1,48 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
ans: dict(round=[
dict(
role="HUMAN",
prompt=
"Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
),
dict(role="BOT", prompt=ans_token),
])
for ans, ans_token in [["A", "{A}"], ["B", "{B}"], ["C", "{C}"],
["D", "{D}"]]
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]

View File

@ -0,0 +1,46 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
race_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'A':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: A',
'B':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: B',
'C':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: C',
'D':
'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: D',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg)
]

View File

@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import ToxicEvaluator
from opencompass.datasets import RealToxicPromptsDataset
realtoxicprompts_reader_cfg = dict(
input_columns=['prompt_text'],
output_column='filename',
train_split='train',
test_split='train')
# TODO: allow empty output-column
realtoxicprompts_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt_text}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
# When key is set to "ENV", the key will be fetched from the environment
# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
realtoxicprompts_eval_cfg = dict(
evaluator=dict(type=ToxicEvaluator, key='ENV'), )
realtoxicprompts_datasets = [
dict(
type=RealToxicPromptsDataset,
path='allenai/real-toxicity-prompts',
challenging_subset=True,
reader_cfg=realtoxicprompts_reader_cfg,
infer_cfg=realtoxicprompts_infer_cfg,
eval_cfg=realtoxicprompts_eval_cfg)
]

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
output_column='label',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerA}")
]),
2:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerB}")
]),
3:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerC}")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
]

View File

@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import storyclozeDataset_V2
storycloze_reader_cfg = dict(
input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
output_column="answer_right_ending",
)
storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{context}\nQuestion: Which ending makes the most sense?\nA. {sentence_quiz1}\nB. {sentence_quiz2}\nYou may choose between 'A' and 'B'.\nAnswer:",
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
storycloze_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
# The original story cloze dataset and repo are not long maintaining.
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr="story_cloze",
type=storyclozeDataset_V2,
path="juletxara/xstory_cloze",
name="en",
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg,
)
]

View File

@ -0,0 +1,58 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
strategyqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test')
strategyqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='''Yes or no: Q: Do hamsters provide food for any animals?
A: Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.
So the answer is yes.
Q: Yes or no: Could Brooke Shields succeed at University of Pennsylvania?
A: Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.
So the answer is yes.
Q: Yes or no: Hydrogen's atomic number squared exceeds number of Spice Girls?
A: Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen's atomic number squared is less than 5.
So the answer is no.
Q: Yes or no: Is it common to see frost during some college commencements?
A: College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.
So the answer is yes.
Q: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?
A: The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.
So the answer is no.
Q: Yes or no: Would a pear sink in water?
A: The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.
So the answer is no.
Q: {question}{answer}
'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
strategyqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='strategyqa'),
dataset_postprocessor=dict(type='strategyqa_dataset'))
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
]

View File

@ -0,0 +1,94 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
strategyqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test')
strategyqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt=
'Question: Do hamsters provide food for any animals?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Could Brooke Shields succeed at University of Pennsylvania?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Hydrogen\'s atomic number squared exceeds number of Spice Girls?\nAnswer:'
),
dict(
role='BOT',
prompt=
'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no'
),
dict(
role='HUMAN',
prompt=
'Question: Is it common to see frost during some college commencements?\nAnswer:'
),
dict(
role='BOT',
prompt=
'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes'
),
dict(
role='HUMAN',
prompt=
'Question: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?\nAnswer:'
),
dict(
role='BOT',
prompt=
'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no'
),
dict(
role='HUMAN',
prompt='Question: Would a pear sink in water?\nAnswer:'),
dict(
role='BOT',
prompt=
'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no'
),
dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
strategyqa_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='strategyqa'),
dataset_postprocessor=dict(type='strategyqa_dataset'))
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
]

Some files were not shown because too many files have changed in this diff Show More