[Feature] Use dataset in local path (#570)

* update commonsenseqa

* update drop

* update flores_first100

* update gsm8k

* update humaneval

* update lambda

* update obqa

* update piqa

* update race

* update siqa

* update story_cloze

* update strategyqa

* update tydiqa

* update winogrande

* update doc

* update hellaswag

* fix obqa

* update collections

* update .zip name
This commit is contained in:
Fengzhe Zhou 2023-11-13 13:00:37 +08:00 committed by GitHub
parent d6aaac22e7
commit 689ffe5b63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
75 changed files with 726 additions and 444 deletions

View File

@ -83,8 +83,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass
pip install -e .
# Download dataset to data/ folder
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).

View File

@ -85,8 +85,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass
pip install -e .
# 下载数据集到 data/ 处
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html)。

View File

@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -52,6 +52,5 @@ with read_base():
from ..nq.nq_gen_0356ec import nq_datasets
from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -34,6 +34,5 @@ with read_base():
from ..obqa.obqa_ppl_c7c154 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -35,6 +35,5 @@ with read_base():
from ..obqa.obqa_gen_9069e4 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -50,8 +50,9 @@ commonsenseqa_eval_cfg = dict(
commonsenseqa_datasets = [
dict(
abbr='commonsense_qa',
type=commonsenseqaDataset,
path="commonsense_qa",
path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg,

View File

@ -45,8 +45,9 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
abbr='commonsense_qa',
type=commonsenseqaDataset,
path='commonsense_qa',
path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)

View File

@ -40,11 +40,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
abbr='commonsense_qa',
type=commonsenseqaDataset,
path='commonsense_qa',
path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template

View File

@ -4,6 +4,11 @@ from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict(
type=PromptTemplate,
template={
@ -31,15 +36,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
abbr='commonsense_qa',
type=commonsenseqaDataset,
path='commonsense_qa',
reader_cfg=dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation',
),
path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template

View File

@ -4,11 +4,18 @@ from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import dropDataset
drop_reader_cfg = dict(
input_columns=['prompt', 'question'],
output_column='answers',
train_split='validation',
test_split='validation',
)
drop_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
template='''\
Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
@ -30,13 +37,8 @@ drop_datasets = [
dict(
abbr='drop',
type=dropDataset,
path='drop',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answers',
train_split='validation',
test_split='validation',
),
path='./data/drop/drop_dataset_dev.json',
reader_cfg=drop_reader_cfg,
infer_cfg=drop_infer_cfg,
eval_cfg=drop_eval_cfg)
]

View File

@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
@ -146,16 +152,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
reader_cfg=flores_reader_cfg.copy(),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
@ -139,16 +145,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
reader_cfg=flores_reader_cfg.copy(),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -1,8 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -32,9 +31,8 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
type=GSM8KDataset,
path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)

View File

@ -1,8 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -79,9 +78,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
type=GSM8KDataset,
path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import AgentInferencer
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
# This config is for code interpreter
gsm8k_example = """
@ -76,9 +76,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
type=GSM8KDataset,
path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import SCInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' )
generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40)
@ -81,9 +81,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
type=GSM8KDataset,
path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -41,9 +41,9 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [
dict(
type=HFDataset,
path='gsm8k',
name='main',
abbr='gsm8k',
type=GSM8KDataset,
path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)

View File

@ -8,7 +8,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"],
output_column="label",
test_split="validation")
)
hellaswag_infer_cfg = dict(
prompt_template=dict(
@ -35,8 +35,9 @@ hellaswag_eval_cfg = dict(
hellaswag_datasets = [
dict(
abbr='hellaswag',
type=hellaswagDataset_V2,
path="hellaswag",
path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)

View File

@ -27,8 +27,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
abbr='hellaswag',
type=hellaswagDataset,
path='hellaswag',
path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)

View File

@ -6,9 +6,8 @@ from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label',
train_split='validation',
test_split='validation')
output_column='label'
)
hellaswag_infer_cfg = dict(
prompt_template=dict(
@ -26,8 +25,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
abbr='hellaswag',
type=hellaswagDataset,
path='hellaswag',
path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
@ -22,8 +22,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)

View File

@ -26,7 +26,7 @@ lambada_datasets = [
dict(
abbr='lambada',
type=lambadaDataset,
path='craffel/openai_lambada',
path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg)

View File

@ -22,7 +22,7 @@ lambada_datasets = [
dict(
abbr='lambada',
type=lambadaDataset,
path='craffel/openai_lambada',
path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg)

View File

@ -32,15 +32,12 @@ obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
path='./data/openbookqa/Main/test.jsonl',
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
path='./data/openbookqa/Additional/test_complete.jsonl',
),
]

View File

@ -24,15 +24,12 @@ obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path="openbookqa",
split="test",
path='./data/openbookqa/Main/test.jsonl',
),
dict(
abbr="openbookqa_fact",
type=OBQADataset,
path="openbookqa",
name="additional",
split="test",
path='./data/openbookqa/Additional/test_complete.jsonl',
),
]
for _i in range(2):

View File

@ -33,9 +33,7 @@ obqa_datasets = [
dict(
abbr='openbookqa_fact',
type=OBQADataset_V2,
path='openbookqa',
name='additional',
split='test',
path='./data/openbookqa/Additional/test_complete.jsonl',
reader_cfg=obqa_reader_cfg,
infer_cfg=obqa_infer_cfg,
eval_cfg=obqa_eval_cfg,

View File

@ -37,16 +37,14 @@ _template = [
obqa_datasets = [
dict(
abbr="openbookqa",
type=OBQADataset,
path='openbookqa',
split='test',
path='./data/openbookqa/Main/test.jsonl',
),
dict(
abbr='openbookqa_fact',
type=OBQADataset,
path='openbookqa',
name='additional',
split='test',
path='./data/openbookqa/Additional/test_complete.jsonl',
),
]
for _i in range(2):

View File

@ -34,7 +34,7 @@ piqa_datasets = [
dict(
abbr="piqa",
type=piqaDataset_V2,
path="piqa",
path='./data/piqa',
reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg)

View File

@ -30,7 +30,7 @@ piqa_datasets = [
dict(
abbr='piqa',
type=piqaDataset_V3,
path='piqa',
path='./data/piqa',
reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'],
@ -23,8 +23,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [
dict(
type=HFDataset,
path='piqa',
abbr='piqa',
type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'],
@ -33,8 +33,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [
dict(
type=HFDataset,
path='piqa',
abbr='piqa',
type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg)

View File

@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
@ -29,17 +32,17 @@ race_eval_cfg = dict(
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,

View File

@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
@ -23,17 +26,17 @@ race_eval_cfg = dict(
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
@ -27,17 +30,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
@ -29,17 +32,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
@ -28,17 +31,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,

View File

@ -34,7 +34,7 @@ siqa_datasets = [
dict(
abbr="siqa",
type=siqaDataset_V2,
path="social_i_qa",
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -25,8 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -25,9 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
name='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)

View File

@ -37,8 +37,8 @@ storycloze_datasets = [
dict(
abbr="story_cloze",
type=storyclozeDataset_V2,
path="juletxara/xstory_cloze",
name="en",
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg,

View File

@ -31,8 +31,8 @@ storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)

View File

@ -28,8 +28,8 @@ storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict(
input_columns=['question'],
@ -86,8 +86,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
type=StrategyQADataset,
path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict(
input_columns=['question'],
@ -50,8 +50,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
type=StrategyQADataset,
path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)

View File

@ -6,9 +6,8 @@ from opencompass.datasets import TydiQADataset, TydiQAEvaluator
# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
input_columns=["passage_text", "question_text"],
output_column="answer",
test_split='validation',
train_split='validation',)
output_column="answer"
)
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
@ -33,19 +32,25 @@ for _lang in langs:
prompt_template=dict(
type=PromptTemplate,
template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
ice_token='</E>'),
ice_token='</E>'
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer), max_out_len=50)
inferencer=dict(type=GenInferencer), max_out_len=50
)
tydiqa_eval_cfg = dict(
evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_datasets.append(
dict(abbr=f'tyidqa-goldp_{_lang}',
type=TydiQADataset,
path='khalidalt/tydiqa-goldp',
name=_lang,
reader_cfg=tydiqa_reader_cfg,
infer_cfg=tydiqa_infer_cfg,
eval_cfg=tydiqa_eval_cfg))
dict(abbr=f'tyidqa-goldp_{_lang}',
type=TydiQADataset,
path='./data/tydiqa',
lang=_lang,
reader_cfg=tydiqa_reader_cfg,
infer_cfg=tydiqa_infer_cfg,
eval_cfg=tydiqa_eval_cfg
)
)

View File

@ -7,8 +7,8 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="label",
test_split="validation")
output_column="answer",
)
winogrande_infer_cfg = dict(
prompt_template=dict(
@ -35,8 +35,7 @@ winogrande_datasets = [
dict(
abbr="winogrande",
type=winograndeDataset_V2,
path="winogrande",
name="winogrande_xs",
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg,

View File

@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'],
output_column='answer',
train_split='validation',
test_split='validation')
)
winogrande_infer_cfg = dict(
prompt_template=dict(
@ -28,8 +27,7 @@ winogrande_datasets = [
dict(
abbr='winogrande',
type=winograndeDataset,
path='winogrande',
name='winogrande_xs',
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg)

View File

@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'],
output_column='answer',
train_split='validation',
test_split='validation')
)
winogrande_infer_cfg = dict(
prompt_template=dict(
@ -26,8 +25,7 @@ winogrande_datasets = [
dict(
abbr='winogrande',
type=winograndeDataset,
path='winogrande',
name='winogrande_xs',
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg)

View File

@ -87,17 +87,6 @@ summarizer = dict(
'eprstmt-dev',
'lambada',
'tnews-dev',
'--------- 安全 Safety ---------', # category
# '偏见', # subcategory
'crows_pairs',
# '有毒性(判别)', # subcategory
'civil_comments',
# '有毒性(判别)多语言', # subcategory
'jigsaw_multilingual',
# '有毒性(生成)', # subcategory
'real-toxicity-prompts',
# '真实性/有用性', # subcategory
'truthful_qa',
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []),

View File

@ -56,8 +56,6 @@ summarizer = dict(
'openbookqa_fact',
'nq',
'triviaqa',
'--- Security ---',
'crows_pairs',
],
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
)

View File

@ -66,10 +66,21 @@ Run the following commands to download and place the datasets in the `${OpenComp
```bash
# Run in the OpenCompass directory
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
If you need to use the more comprehensive dataset (~500M) provided by OpenCompass, You can download it using the following command:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
The list of datasets included in both `.zip` can be found [here](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)
OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
For next step, please read [Quick Start](./quick_start.md).

View File

@ -66,10 +66,21 @@ OpenCompass 支持的数据集主要包括两个部分:
在 OpenCompass 项目根目录下运行下面命令,将数据集准备至 `${OpenCompass}/data` 目录下:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
如果需要使用 OpenCompass 提供的更加完整的数据集 (~500M),可以使用下述命令进行下载:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
两个 `.zip` 中所含数据集列表如[此处](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)所示。
OpenCompass 已经支持了大多数常用于性能比较的数据集,具体支持的数据集列表请直接在 `configs/datasets` 下进行查找。
接下来,你可以阅读[快速上手](./quick_start.md)了解 OpenCompass 的基本用法。

View File

@ -1,4 +1,7 @@
from datasets import load_dataset
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
@ -9,14 +12,33 @@ from .base import BaseDataset
class commonsenseqaDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def load(path):
dataset = {}
for split, stub in [
['train', 'train_rand_split.jsonl'],
['validation', 'dev_rand_split.jsonl'],
]:
data_path = os.path.join(path, stub)
dataset_list = []
with open(data_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
dataset_list.append({
'question':
line['question']['stem'],
'A':
line['question']['choices'][0]['text'],
'B':
line['question']['choices'][1]['text'],
'C':
line['question']['choices'][2]['text'],
'D':
line['question']['choices'][3]['text'],
'E':
line['question']['choices'][4]['text'],
'answerKey':
line['answerKey'],
})
dataset[split] = Dataset.from_list(dataset_list)
def pre_process(example):
for i in range(5):
example[chr(ord('A') + i)] = example['choices']['text'][i]
return example
dataset = dataset.map(pre_process).remove_columns(
['question_concept', 'id', 'choices'])
return dataset
return DatasetDict(dataset)

View File

@ -1,4 +1,6 @@
from datasets import DatasetDict, load_dataset
import json
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
@ -9,21 +11,37 @@ from .base import BaseDataset
class dropDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs, split='validation')
def get_answers(validated_answers):
answers = []
for answer_item in validated_answers:
if answer_item['number']:
answers.append(answer_item['number'])
elif any(answer_item['date'][i] for i in ['day', 'month', 'year']):
d = [answer_item['date'][i] for i in ['day', 'month', 'year']]
answers.append(' '.join(d).strip())
else:
for span in answer_item['spans']:
answers.append(span)
answers = list(set(answers))
return answers
def pre_process(example):
example['answers'] = example['answers_spans']['spans']
example['prompt'] = example.pop('passage')
return example
@staticmethod
def load(path, only_number=True):
with open(path, 'r', encoding='utf-8') as f:
lines = json.load(f)
dataset_list = []
for line in lines.values():
for qa_pair in line['qa_pairs']:
validated_answers = qa_pair['validated_answers']
if only_number and not any(i['number']
for i in validated_answers):
continue
item = {
'prompt': line['passage'],
'question': qa_pair['question'],
'answers': dropDataset.get_answers(validated_answers),
}
dataset_list.append(item)
def only_number(example):
for i in example['answers_spans']['types']:
if i == 'number':
return True
return False
dataset = dataset.filter(only_number)
dataset = dataset.map(pre_process).remove_columns(
['section_id', 'query_id'])
return DatasetDict({'validation': dataset})
dataset_list = Dataset.from_list(dataset_list)
return DatasetDict({'validation': dataset_list})

View File

@ -1,6 +1,7 @@
import os
import re
from datasets import DatasetDict, load_dataset
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
@ -11,15 +12,30 @@ from .base import BaseDataset
class FloresFirst100Dataset(BaseDataset):
@staticmethod
def load(name):
return DatasetDict({
'dev':
load_dataset(path='facebook/flores', name=name, split='dev'),
'devtest':
load_dataset(path='facebook/flores',
name=name,
split='devtest[:100]')
})
def load_single(src_path, tgt_path, src_lang, tgt_lang):
with open(src_path, 'r', encoding='utf-8') as f:
src_lines = f.readlines()
with open(tgt_path, 'r', encoding='utf-8') as f:
tgt_lines = f.readlines()
assert len(src_lines) == len(tgt_lines)
dataset_list = [{
f'sentence_{src_lang}': src_lines[i].strip(),
f'sentence_{tgt_lang}': tgt_lines[i].strip(),
} for i in range(len(src_lines))]
return Dataset.from_list(dataset_list)
@staticmethod
def load(path, name):
src_lang, tgt_lang = name.split('-')
dev_dataset = FloresFirst100Dataset.load_single(
os.path.join(path, 'dev', f'{src_lang}.dev'),
os.path.join(path, 'dev', f'{tgt_lang}.dev'), src_lang, tgt_lang)
devtest_dataset = FloresFirst100Dataset.load_single(
os.path.join(path, 'devtest', f'{src_lang}.devtest'),
os.path.join(path, 'devtest', f'{tgt_lang}.devtest'), src_lang,
tgt_lang)
return DatasetDict({'dev': dev_dataset, 'devtest': devtest_dataset})
@TEXT_POSTPROCESSORS.register_module('flores')

View File

@ -1,5 +1,30 @@
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.openicl import BaseEvaluator
from opencompass.registry import TEXT_POSTPROCESSORS
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@LOAD_DATASET.register_module()
class GSM8KDataset(BaseDataset):
@staticmethod
def load(path):
datasets = {}
for split in ['train', 'test']:
split_path = os.path.join(path, split + '.jsonl')
dataset = []
with open(split_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line.strip())
line['answer']
dataset.append(line)
datasets[split] = Dataset.from_list(dataset)
return DatasetDict(datasets)
@TEXT_POSTPROCESSORS.register_module('gsm8k_dataset')

View File

@ -1,6 +1,6 @@
import json
from datasets import Dataset, load_dataset
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
@ -11,15 +11,20 @@ from .base import BaseDataset
class hellaswagDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
for i in range(4):
example[chr(ord('A') + i)] = example['endings'][i]
return example
dataset = dataset.map(preprocess).remove_columns(['endings'])
def load(path):
dataset = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
dataset.append({
'ctx': data['query'].split(': ', 2)[-1],
'A': data['choices'][0],
'B': data['choices'][1],
'C': data['choices'][2],
'D': data['choices'][3],
'label': data['gold'],
})
dataset = Dataset.from_list(dataset)
return dataset
@ -27,19 +32,20 @@ class hellaswagDataset(BaseDataset):
class hellaswagDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
for i in range(4):
example[chr(ord('A') + i)] = example['endings'][i]
if example['label']:
example['label'] = 'ABCD'[int(example['label'])]
else:
example['label'] = 'NULL'
return example
dataset = dataset.map(preprocess).remove_columns(['endings'])
def load(path):
dataset = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
dataset.append({
'ctx': data['query'].split(': ', 1)[-1],
'A': data['choices'][0],
'B': data['choices'][1],
'C': data['choices'][2],
'D': data['choices'][3],
'label': 'ABCD'[data['gold']],
})
dataset = Dataset.from_list(dataset)
return dataset

View File

@ -1,9 +1,27 @@
import json
import os.path as osp
import re
import tempfile
from typing import List
from datasets import Dataset
from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class HumanevalDataset(BaseDataset):
@staticmethod
def load(path):
dataset = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
dataset.append(json.loads(line.strip()))
return Dataset.from_list(dataset)
class HumanEvaluator(BaseEvaluator):

View File

@ -1,7 +1,8 @@
import json
import re
import string
from datasets import DatasetDict, load_dataset
from datasets import Dataset, DatasetDict
from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
@ -14,16 +15,12 @@ from .base import BaseDataset
class lambadaDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs, split='test')
def preprocess(example):
prompt, target = example['text'].strip().rsplit(' ', 1)
example['prompt'] = prompt
example['label'] = target
return example
dataset = dataset.map(preprocess)
def load(path):
dataset = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
dataset.append(json.loads(line))
dataset = Dataset.from_list(dataset)
return DatasetDict({'test': dataset})

View File

@ -1,4 +1,6 @@
from datasets import load_dataset
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
@ -9,33 +11,46 @@ from .base import BaseDataset
class OBQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def pre_process(example):
for i in range(4):
example[chr(ord('A') + i)] = example['choices']['text'][i]
return example
dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
return dataset
def load(path):
dataset_list = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
item = {
'A': line['question']['choices'][0]['text'],
'B': line['question']['choices'][1]['text'],
'C': line['question']['choices'][2]['text'],
'D': line['question']['choices'][3]['text'],
'question_stem': line['question']['stem'],
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)
@LOAD_DATASET.register_module()
class OBQADataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def pre_process(example):
example['A'] = example['choices']['text'][0]
example['B'] = example['choices']['text'][1]
example['C'] = example['choices']['text'][2]
example['D'] = example['choices']['text'][3]
if not example['question_stem'].endswith('?'):
example['question_stem'] += ' what?'
return example
dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
return dataset
def load(path):
dataset_list = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
question = line['question']['stem']
if not question.endswith('?'):
question += ' what?'
item = {
'A': line['question']['choices'][0]['text'],
'B': line['question']['choices'][1]['text'],
'C': line['question']['choices'][2]['text'],
'D': line['question']['choices'][3]['text'],
'question_stem': question,
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)

View File

@ -1,50 +1,108 @@
from datasets import load_dataset
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class piqaDataset(BaseDataset):
@staticmethod
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
dataset.append(i)
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = piqaDataset.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module()
class piqaDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
assert isinstance(example['label'], int)
if example['label'] < 0:
example['answer'] = 'NULL'
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
label = int(label.strip())
if label < 0:
i['answer'] = 'NULL'
else:
example['answer'] = 'AB'[example['label']]
example.pop('label')
return example
i['answer'] = 'AB'[label]
dataset.append(i)
dataset = dataset.map(preprocess)
return dataset
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = piqaDataset_V2.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset_V2.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module()
class piqaDataset_V3(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
example['goal'] = example['goal'][0].upper() + example['goal'][1:]
if example['goal'].endswith('?') or example['goal'].endswith('.'):
example['sol1'] = example['sol1'][0].upper(
) + example['sol1'][1:]
example['sol2'] = example['sol2'][0].upper(
) + example['sol2'][1:]
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
# some preprocessing
i['goal'] = i['goal'][0].upper() + i['goal'][1:]
if i['goal'].endswith('?') or i['goal'].endswith('.'):
i['sol1'] = i['sol1'][0].upper() + i['sol1'][1:]
i['sol2'] = i['sol2'][0].upper() + i['sol2'][1:]
else:
example['sol1'] = example['sol1'][0].lower(
) + example['sol1'][1:]
example['sol2'] = example['sol2'][0].lower(
) + example['sol2'][1:]
return example
i['sol1'] = i['sol1'][0].lower() + i['sol1'][1:]
i['sol2'] = i['sol2'][0].lower() + i['sol2'][1:]
dataset = dataset.map(preprocess)
return dataset
dataset.append(i)
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = piqaDataset_V3.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset_V3.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})

View File

@ -1,4 +1,7 @@
from datasets import load_dataset
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
@ -10,12 +13,21 @@ class RaceDataset(BaseDataset):
@staticmethod
def load(path: str, name: str):
dataset = load_dataset(path, name)
def preprocess(x):
for ans, option in zip(['A', 'B', 'C', 'D'], x['options']):
x[ans] = option
del x['options']
return x
return dataset.map(preprocess)
dataset = {}
for split in ['validation', 'test']:
jsonl_path = os.path.join(path, split, f'{name}.jsonl')
dataset_list = []
with open(jsonl_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
dataset_list.append({
'article': line['article'],
'question': line['question'],
'A': line['options'][0],
'B': line['options'][1],
'C': line['options'][2],
'D': line['options'][3],
'answer': line['answer'],
})
dataset[split] = Dataset.from_list(dataset_list)
return DatasetDict(dataset)

View File

@ -1,4 +1,7 @@
from datasets import load_dataset
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
@ -6,24 +9,72 @@ from .base import BaseDataset
@LOAD_DATASET.register_module()
class siqaDataset_V2(BaseDataset):
class siqaDataset(BaseDataset):
"""Disconnect from HuggingFace version of HFDataset."""
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
dataset.append(i)
def preprocess(example):
example['all_labels'] = {
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = siqaDataset.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = siqaDataset.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module()
class siqaDataset_V2(BaseDataset):
"""Disconnect from HuggingFace version of siqaDataset_V2."""
@staticmethod
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
label = int(label.strip())
# some preprocessing
i['all_labels'] = {
'candidates': [
f'A. {example["answerA"]}',
f'B. {example["answerB"]}',
f'C. {example["answerC"]}',
[f'A. {i["answerA"]}', 'A', i['answerA']],
[f'B. {i["answerB"]}', 'B', i['answerB']],
[f'C. {i["answerC"]}', 'C', i['answerC']],
],
'label':
int(example['label']) - 1
label - 1
}
example['label'] = ' ABC'[int(example['label'])]
return example
i['label'] = ' ABC'[label]
dataset = dataset.map(preprocess)
return dataset
dataset.append(i)
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = siqaDataset_V2.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = siqaDataset_V2.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})

View File

@ -1,4 +1,7 @@
from datasets import DatasetDict, load_dataset
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
@ -9,38 +12,39 @@ from .base import BaseDataset
class storyclozeDataset(BaseDataset):
@staticmethod
def load(**kwargs):
# special process
dataset = load_dataset(**kwargs, split='train+eval')
def preprocess(example):
example['context'] = ' '.join([
example['input_sentence_1'], example['input_sentence_2'],
example['input_sentence_3'], example['input_sentence_4']
])
return example
dataset = dataset.map(preprocess)
return DatasetDict({'test': dataset})
def load(path, lang):
dataset_list = []
for split in ['train', 'eval']:
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
with open(split_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
line['context'] = ' '.join([
line['input_sentence_1'], line['input_sentence_2'],
line['input_sentence_3'], line['input_sentence_4']
])
dataset_list.append(line)
dataset_list = Dataset.from_list(dataset_list)
return DatasetDict({'test': dataset_list})
@LOAD_DATASET.register_module()
class storyclozeDataset_V2(BaseDataset):
@staticmethod
def load(**kwargs):
# special process
dataset = load_dataset(**kwargs, split='train+eval')
def preprocess(example):
example['context'] = ' '.join([
example['input_sentence_1'], example['input_sentence_2'],
example['input_sentence_3'], example['input_sentence_4']
])
example['answer_right_ending'] = ' AB'[
example['answer_right_ending']]
return example
dataset = dataset.map(preprocess)
return dataset
def load(path, lang):
dataset_list = []
for split in ['train', 'eval']:
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
with open(split_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
line['context'] = ' '.join([
line['input_sentence_1'], line['input_sentence_2'],
line['input_sentence_3'], line['input_sentence_4']
])
line['answer_right_ending'] = ' AB'[
line['answer_right_ending']]
dataset_list.append(line)
dataset_list = Dataset.from_list(dataset_list)
return dataset_list

View File

@ -1,6 +1,11 @@
import json
import re
from opencompass.registry import TEXT_POSTPROCESSORS
from datasets import Dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@TEXT_POSTPROCESSORS.register_module('strategyqa')
@ -16,3 +21,13 @@ def strategyqa_pred_postprocess(text: str) -> str:
@TEXT_POSTPROCESSORS.register_module('strategyqa_dataset')
def strategyqa_dataset_postprocess(text: str) -> str:
return 'yes' if str(text) == 'True' else 'no'
@LOAD_DATASET.register_module()
class StrategyQADataset(BaseDataset):
@staticmethod
def load(path):
with open(path, 'r', encoding='utf-8') as f:
dataset = json.load(f)
return Dataset.from_list(dataset)

View File

@ -1,7 +1,9 @@
import json
import os
import re
from collections import Counter
from datasets import load_dataset
from datasets import Dataset
from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.utils.text_postprocessors import general_postprocess
@ -12,15 +14,16 @@ from .base import BaseDataset
class TydiQADataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def pre_process(example):
example['answer'] = example['answers']['text']
return example
dataset = dataset.map(pre_process).remove_columns(['id', 'answers'])
return dataset
def load(path, lang):
path = os.path.join(path, 'dev', f'{lang}-dev.jsonl')
dataset_list = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
answer = list(set([i['text'] for i in line['answers']]))
line['answer'] = answer
dataset_list.append(line)
return Dataset.from_list(dataset_list)
class TydiQAEvaluator(BaseEvaluator):

View File

@ -1,4 +1,7 @@
from datasets import load_dataset
import json
import os
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
@ -7,38 +10,49 @@ from .base import BaseDataset
@LOAD_DATASET.register_module()
class winograndeDataset(BaseDataset):
"""Disconnect from Huggingface, winograndeDataset."""
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
prompt = example.pop('sentence')
example['opt1'] = prompt.replace('_', example.pop('option1'))
example['opt2'] = prompt.replace('_', example.pop('option2'))
return example
return dataset.map(preprocess)
def load(path):
path = os.path.join(path, 'dev.jsonl')
dataset_list = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
prompt = line['sentence']
dataset_list.append({
'opt1':
prompt.replace('_', line['option1']),
'opt2':
prompt.replace('_', line['option2']),
'answer':
line['answer']
})
dataset_list = Dataset.from_list(dataset_list)
return dataset_list
@LOAD_DATASET.register_module()
class winograndeDataset_V2(BaseDataset):
"""Disconnect from Huggingface, winograndeDataset_V2."""
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
def preprocess(example):
prompt = example.pop('sentence')
example['opt1'] = prompt.replace('_', example.pop('option1'))
example['opt2'] = prompt.replace('_', example.pop('option2'))
answer = example.pop('answer')
if answer == '':
example['label'] = 'NULL'
else:
example['label'] = ' AB'[int(answer)]
return example
return dataset.map(preprocess)
def load(path):
path = os.path.join(path, 'dev.jsonl')
dataset_list = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
prompt = line['sentence']
answer = line['answer']
answer = ' AB'[int(answer)] if answer != '' else 'NULL'
dataset_list.append({
'opt1':
prompt.replace('_', line['option1']),
'opt2':
prompt.replace('_', line['option2']),
'answer':
answer
})
dataset_list = Dataset.from_list(dataset_list)
return dataset_list