[Feature] Use dataset in local path (#570)

* update commonsenseqa

* update drop

* update flores_first100

* update gsm8k

* update humaneval

* update lambda

* update obqa

* update piqa

* update race

* update siqa

* update story_cloze

* update strategyqa

* update tydiqa

* update winogrande

* update doc

* update hellaswag

* fix obqa

* update collections

* update .zip name
This commit is contained in:
Fengzhe Zhou 2023-11-13 13:00:37 +08:00 committed by GitHub
parent d6aaac22e7
commit 689ffe5b63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
75 changed files with 726 additions and 444 deletions

View File

@ -83,8 +83,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass cd opencompass
pip install -e . pip install -e .
# Download dataset to data/ folder # Download dataset to data/ folder
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html). Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).

View File

@ -85,8 +85,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass cd opencompass
pip install -e . pip install -e .
# 下载数据集到 data/ 处 # 下载数据集到 data/ 处
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html)。 有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html)。

View File

@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -52,6 +52,5 @@ with read_base():
from ..nq.nq_gen_0356ec import nq_datasets from ..nq.nq_gen_0356ec import nq_datasets
from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -34,6 +34,5 @@ with read_base():
from ..obqa.obqa_ppl_c7c154 import obqa_datasets from ..obqa.obqa_ppl_c7c154 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -35,6 +35,5 @@ with read_base():
from ..obqa.obqa_gen_9069e4 import obqa_datasets from ..obqa.obqa_gen_9069e4 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

View File

@ -50,8 +50,9 @@ commonsenseqa_eval_cfg = dict(
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path="commonsense_qa", path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg, eval_cfg=commonsenseqa_eval_cfg,

View File

@ -45,8 +45,9 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)

View File

@ -40,11 +40,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)
] ]
del _ice_template

View File

@ -4,6 +4,11 @@ from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict( _ice_template = dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
@ -31,15 +36,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=dict( reader_cfg=commonsenseqa_reader_cfg,
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation',
),
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)
] ]
del _ice_template

View File

@ -4,11 +4,18 @@ from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import dropDataset from opencompass.datasets import dropDataset
drop_reader_cfg = dict(
input_columns=['prompt', 'question'],
output_column='answers',
train_split='validation',
test_split='validation',
)
drop_infer_cfg = dict( drop_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template= template='''\
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older. Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
Question: How many more percent are under the age of 18 compared to the 18 to 24 group? Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8. Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
@ -30,13 +37,8 @@ drop_datasets = [
dict( dict(
abbr='drop', abbr='drop',
type=dropDataset, type=dropDataset,
path='drop', path='./data/drop/drop_dataset_dev.json',
reader_cfg=dict( reader_cfg=drop_reader_cfg,
input_columns=['prompt'],
output_column='answers',
train_split='validation',
test_split='validation',
),
infer_cfg=drop_infer_cfg, infer_cfg=drop_infer_cfg,
eval_cfg=drop_eval_cfg) eval_cfg=drop_eval_cfg)
] ]

View File

@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src] _, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt] _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict( flores_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
@ -146,16 +152,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores") flores_eval_cfg["dataset_postprocessor"] = dict(type="flores")
flores_datasets.append( flores_datasets.append(
dict( dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}", abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}", name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict( reader_cfg=flores_reader_cfg.copy(),
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(), infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(), eval_cfg=flores_eval_cfg.copy(),
)) ))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src] _, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt] _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict( flores_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
@ -139,16 +145,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese") flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append( flores_datasets.append(
dict( dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}", abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}", name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict( reader_cfg=flores_reader_cfg.copy(),
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(), infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(), eval_cfg=flores_eval_cfg.copy(),
)) ))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst

View File

@ -1,8 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -32,9 +31,8 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)

View File

@ -1,8 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -79,9 +78,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import AgentInferencer from opencompass.openicl.icl_inferencer import AgentInferencer
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
# This config is for code interpreter # This config is for code interpreter
gsm8k_example = """ gsm8k_example = """
@ -76,9 +76,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import SCInferencer from opencompass.openicl.icl_inferencer import SCInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' ) gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' )
generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40) generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40)
@ -81,9 +81,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
@ -41,9 +41,9 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
type=HFDataset, abbr='gsm8k',
path='gsm8k', type=GSM8KDataset,
name='main', path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)

View File

@ -8,7 +8,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"], input_columns=["ctx", "A", "B", "C", "D"],
output_column="label", output_column="label",
test_split="validation") )
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -35,8 +35,9 @@ hellaswag_eval_cfg = dict(
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset_V2, type=hellaswagDataset_V2,
path="hellaswag", path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)

View File

@ -27,8 +27,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset, type=hellaswagDataset,
path='hellaswag', path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)

View File

@ -6,9 +6,8 @@ from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'], input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label', output_column='label'
train_split='validation', )
test_split='validation')
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -26,8 +25,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset, type=hellaswagDataset,
path='hellaswag', path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
@ -22,8 +22,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)

View File

@ -26,7 +26,7 @@ lambada_datasets = [
dict( dict(
abbr='lambada', abbr='lambada',
type=lambadaDataset, type=lambadaDataset,
path='craffel/openai_lambada', path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg, reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg, infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg) eval_cfg=lambada_eval_cfg)

View File

@ -22,7 +22,7 @@ lambada_datasets = [
dict( dict(
abbr='lambada', abbr='lambada',
type=lambadaDataset, type=lambadaDataset,
path='craffel/openai_lambada', path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg, reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg, infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg) eval_cfg=lambada_eval_cfg)

View File

@ -32,15 +32,12 @@ obqa_datasets = [
dict( dict(
abbr="openbookqa", abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Main/test.jsonl',
split="test",
), ),
dict( dict(
abbr="openbookqa_fact", abbr="openbookqa_fact",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Additional/test_complete.jsonl',
name="additional",
split="test",
), ),
] ]

View File

@ -24,15 +24,12 @@ obqa_datasets = [
dict( dict(
abbr="openbookqa", abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Main/test.jsonl',
split="test",
), ),
dict( dict(
abbr="openbookqa_fact", abbr="openbookqa_fact",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Additional/test_complete.jsonl',
name="additional",
split="test",
), ),
] ]
for _i in range(2): for _i in range(2):

View File

@ -33,9 +33,7 @@ obqa_datasets = [
dict( dict(
abbr='openbookqa_fact', abbr='openbookqa_fact',
type=OBQADataset_V2, type=OBQADataset_V2,
path='openbookqa', path='./data/openbookqa/Additional/test_complete.jsonl',
name='additional',
split='test',
reader_cfg=obqa_reader_cfg, reader_cfg=obqa_reader_cfg,
infer_cfg=obqa_infer_cfg, infer_cfg=obqa_infer_cfg,
eval_cfg=obqa_eval_cfg, eval_cfg=obqa_eval_cfg,

View File

@ -37,16 +37,14 @@ _template = [
obqa_datasets = [ obqa_datasets = [
dict( dict(
abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path='openbookqa', path='./data/openbookqa/Main/test.jsonl',
split='test',
), ),
dict( dict(
abbr='openbookqa_fact', abbr='openbookqa_fact',
type=OBQADataset, type=OBQADataset,
path='openbookqa', path='./data/openbookqa/Additional/test_complete.jsonl',
name='additional',
split='test',
), ),
] ]
for _i in range(2): for _i in range(2):

View File

@ -34,7 +34,7 @@ piqa_datasets = [
dict( dict(
abbr="piqa", abbr="piqa",
type=piqaDataset_V2, type=piqaDataset_V2,
path="piqa", path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)

View File

@ -30,7 +30,7 @@ piqa_datasets = [
dict( dict(
abbr='piqa', abbr='piqa',
type=piqaDataset_V3, type=piqaDataset_V3,
path='piqa', path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict( piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'], input_columns=['goal', 'sol1', 'sol2'],
@ -23,8 +23,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [ piqa_datasets = [
dict( dict(
type=HFDataset, abbr='piqa',
path='piqa', type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict( piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'], input_columns=['goal', 'sol1', 'sol2'],
@ -33,8 +33,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [ piqa_datasets = [
dict( dict(
type=HFDataset, abbr='piqa',
path='piqa', type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)

View File

@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -29,17 +32,17 @@ race_eval_cfg = dict(
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,

View File

@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -23,17 +26,17 @@ race_eval_cfg = dict(
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -27,17 +30,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -29,17 +32,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,

View File

@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -28,17 +31,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,

View File

@ -34,7 +34,7 @@ siqa_datasets = [
dict( dict(
abbr="siqa", abbr="siqa",
type=siqaDataset_V2, type=siqaDataset_V2,
path="social_i_qa", path='./data/siqa',
reader_cfg=siqa_reader_cfg, reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg, infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg) eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict( siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'], input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -25,8 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [ siqa_datasets = [
dict( dict(
abbr="siqa", abbr="siqa",
type=HFDataset, type=siqaDataset,
path='social_i_qa', path='./data/siqa',
reader_cfg=siqa_reader_cfg, reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg, infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg) eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict( siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'], input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -25,9 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [ siqa_datasets = [
dict( dict(
abbr="siqa", abbr="siqa",
type=HFDataset, type=siqaDataset,
path='social_i_qa', path='./data/siqa',
name='social_i_qa',
reader_cfg=siqa_reader_cfg, reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg, infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg) eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict( siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'], input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [ siqa_datasets = [
dict( dict(
abbr="siqa", abbr="siqa",
type=HFDataset, type=siqaDataset,
path='social_i_qa', path='./data/siqa',
reader_cfg=siqa_reader_cfg, reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg, infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg) eval_cfg=siqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict( siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'], input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [ siqa_datasets = [
dict( dict(
abbr="siqa", abbr="siqa",
type=HFDataset, type=siqaDataset,
path='social_i_qa', path='./data/siqa',
reader_cfg=siqa_reader_cfg, reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg, infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg) eval_cfg=siqa_eval_cfg)

View File

@ -37,8 +37,8 @@ storycloze_datasets = [
dict( dict(
abbr="story_cloze", abbr="story_cloze",
type=storyclozeDataset_V2, type=storyclozeDataset_V2,
path="juletxara/xstory_cloze", path='./data/xstory_cloze',
name="en", lang='en',
reader_cfg=storycloze_reader_cfg, reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg, infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg, eval_cfg=storycloze_eval_cfg,

View File

@ -31,8 +31,8 @@ storycloze_datasets = [
dict( dict(
abbr='story_cloze', abbr='story_cloze',
type=storyclozeDataset, type=storyclozeDataset,
path='juletxara/xstory_cloze', path='./data/xstory_cloze',
name='en', lang='en',
reader_cfg=storycloze_reader_cfg, reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg, infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg) eval_cfg=storycloze_eval_cfg)

View File

@ -28,8 +28,8 @@ storycloze_datasets = [
dict( dict(
abbr='story_cloze', abbr='story_cloze',
type=storyclozeDataset, type=storyclozeDataset,
path='juletxara/xstory_cloze', path='./data/xstory_cloze',
name='en', lang='en',
reader_cfg=storycloze_reader_cfg, reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg, infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg) eval_cfg=storycloze_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict( strategyqa_reader_cfg = dict(
input_columns=['question'], input_columns=['question'],
@ -86,8 +86,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [ strategyqa_datasets = [
dict( dict(
abbr='strategyqa', abbr='strategyqa',
type=HFDataset, type=StrategyQADataset,
path='wics/strategy-qa', path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg, reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg, infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg) eval_cfg=strategyqa_eval_cfg)

View File

@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict( strategyqa_reader_cfg = dict(
input_columns=['question'], input_columns=['question'],
@ -50,8 +50,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [ strategyqa_datasets = [
dict( dict(
abbr='strategyqa', abbr='strategyqa',
type=HFDataset, type=StrategyQADataset,
path='wics/strategy-qa', path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg, reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg, infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg) eval_cfg=strategyqa_eval_cfg)

View File

@ -6,9 +6,8 @@ from opencompass.datasets import TydiQADataset, TydiQAEvaluator
# All configs are for TydiQA Goldp task # All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict( tydiqa_reader_cfg = dict(
input_columns=["passage_text", "question_text"], input_columns=["passage_text", "question_text"],
output_column="answer", output_column="answer"
test_split='validation', )
train_split='validation',)
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai'] langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
@ -33,19 +32,25 @@ for _lang in langs:
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" , template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
ice_token='</E>'), ice_token='</E>'
),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer), max_out_len=50) inferencer=dict(type=GenInferencer), max_out_len=50
)
tydiqa_eval_cfg = dict(
evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_datasets.append( tydiqa_datasets.append(
dict(abbr=f'tyidqa-goldp_{_lang}', dict(abbr=f'tyidqa-goldp_{_lang}',
type=TydiQADataset, type=TydiQADataset,
path='khalidalt/tydiqa-goldp', path='./data/tydiqa',
name=_lang, lang=_lang,
reader_cfg=tydiqa_reader_cfg, reader_cfg=tydiqa_reader_cfg,
infer_cfg=tydiqa_infer_cfg, infer_cfg=tydiqa_infer_cfg,
eval_cfg=tydiqa_eval_cfg)) eval_cfg=tydiqa_eval_cfg
)
)

View File

@ -7,8 +7,8 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
winogrande_reader_cfg = dict( winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"], input_columns=["opt1", "opt2"],
output_column="label", output_column="answer",
test_split="validation") )
winogrande_infer_cfg = dict( winogrande_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -35,8 +35,7 @@ winogrande_datasets = [
dict( dict(
abbr="winogrande", abbr="winogrande",
type=winograndeDataset_V2, type=winograndeDataset_V2,
path="winogrande", path='./data/winogrande',
name="winogrande_xs",
reader_cfg=winogrande_reader_cfg, reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg, infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg, eval_cfg=winogrande_eval_cfg,

View File

@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict( winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'], input_columns=['opt1', 'opt2'],
output_column='answer', output_column='answer',
train_split='validation', )
test_split='validation')
winogrande_infer_cfg = dict( winogrande_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -28,8 +27,7 @@ winogrande_datasets = [
dict( dict(
abbr='winogrande', abbr='winogrande',
type=winograndeDataset, type=winograndeDataset,
path='winogrande', path='./data/winogrande',
name='winogrande_xs',
reader_cfg=winogrande_reader_cfg, reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg, infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg) eval_cfg=winogrande_eval_cfg)

View File

@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict( winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'], input_columns=['opt1', 'opt2'],
output_column='answer', output_column='answer',
train_split='validation', )
test_split='validation')
winogrande_infer_cfg = dict( winogrande_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
@ -26,8 +25,7 @@ winogrande_datasets = [
dict( dict(
abbr='winogrande', abbr='winogrande',
type=winograndeDataset, type=winograndeDataset,
path='winogrande', path='./data/winogrande',
name='winogrande_xs',
reader_cfg=winogrande_reader_cfg, reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg, infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg) eval_cfg=winogrande_eval_cfg)

View File

@ -87,17 +87,6 @@ summarizer = dict(
'eprstmt-dev', 'eprstmt-dev',
'lambada', 'lambada',
'tnews-dev', 'tnews-dev',
'--------- 安全 Safety ---------', # category
# '偏见', # subcategory
'crows_pairs',
# '有毒性(判别)', # subcategory
'civil_comments',
# '有毒性(判别)多语言', # subcategory
'jigsaw_multilingual',
# '有毒性(生成)', # subcategory
'real-toxicity-prompts',
# '真实性/有用性', # subcategory
'truthful_qa',
], ],
summary_groups=sum( summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []), [v for k, v in locals().items() if k.endswith("_summary_groups")], []),

View File

@ -56,8 +56,6 @@ summarizer = dict(
'openbookqa_fact', 'openbookqa_fact',
'nq', 'nq',
'triviaqa', 'triviaqa',
'--- Security ---',
'crows_pairs',
], ],
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []), summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
) )

View File

@ -66,10 +66,21 @@ Run the following commands to download and place the datasets in the `${OpenComp
```bash ```bash
# Run in the OpenCompass directory # Run in the OpenCompass directory
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
If you need to use the more comprehensive dataset (~500M) provided by OpenCompass, You can download it using the following command:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
The list of datasets included in both `.zip` can be found [here](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)
OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets. OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
For next step, please read [Quick Start](./quick_start.md). For next step, please read [Quick Start](./quick_start.md).

View File

@ -66,10 +66,21 @@ OpenCompass 支持的数据集主要包括两个部分:
在 OpenCompass 项目根目录下运行下面命令,将数据集准备至 `${OpenCompass}/data` 目录下: 在 OpenCompass 项目根目录下运行下面命令,将数据集准备至 `${OpenCompass}/data` 目录下:
```bash ```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
如果需要使用 OpenCompass 提供的更加完整的数据集 (~500M),可以使用下述命令进行下载:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
两个 `.zip` 中所含数据集列表如[此处](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)所示。
OpenCompass 已经支持了大多数常用于性能比较的数据集,具体支持的数据集列表请直接在 `configs/datasets` 下进行查找。 OpenCompass 已经支持了大多数常用于性能比较的数据集,具体支持的数据集列表请直接在 `configs/datasets` 下进行查找。
接下来,你可以阅读[快速上手](./quick_start.md)了解 OpenCompass 的基本用法。 接下来,你可以阅读[快速上手](./quick_start.md)了解 OpenCompass 的基本用法。

View File

@ -1,4 +1,7 @@
from datasets import load_dataset import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -9,14 +12,33 @@ from .base import BaseDataset
class commonsenseqaDataset(BaseDataset): class commonsenseqaDataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs) dataset = {}
for split, stub in [
['train', 'train_rand_split.jsonl'],
['validation', 'dev_rand_split.jsonl'],
]:
data_path = os.path.join(path, stub)
dataset_list = []
with open(data_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
dataset_list.append({
'question':
line['question']['stem'],
'A':
line['question']['choices'][0]['text'],
'B':
line['question']['choices'][1]['text'],
'C':
line['question']['choices'][2]['text'],
'D':
line['question']['choices'][3]['text'],
'E':
line['question']['choices'][4]['text'],
'answerKey':
line['answerKey'],
})
dataset[split] = Dataset.from_list(dataset_list)
def pre_process(example): return DatasetDict(dataset)
for i in range(5):
example[chr(ord('A') + i)] = example['choices']['text'][i]
return example
dataset = dataset.map(pre_process).remove_columns(
['question_concept', 'id', 'choices'])
return dataset

View File

@ -1,4 +1,6 @@
from datasets import DatasetDict, load_dataset import json
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -9,21 +11,37 @@ from .base import BaseDataset
class dropDataset(BaseDataset): class dropDataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def get_answers(validated_answers):
dataset = load_dataset(**kwargs, split='validation') answers = []
for answer_item in validated_answers:
if answer_item['number']:
answers.append(answer_item['number'])
elif any(answer_item['date'][i] for i in ['day', 'month', 'year']):
d = [answer_item['date'][i] for i in ['day', 'month', 'year']]
answers.append(' '.join(d).strip())
else:
for span in answer_item['spans']:
answers.append(span)
answers = list(set(answers))
return answers
def pre_process(example): @staticmethod
example['answers'] = example['answers_spans']['spans'] def load(path, only_number=True):
example['prompt'] = example.pop('passage') with open(path, 'r', encoding='utf-8') as f:
return example lines = json.load(f)
dataset_list = []
for line in lines.values():
for qa_pair in line['qa_pairs']:
validated_answers = qa_pair['validated_answers']
if only_number and not any(i['number']
for i in validated_answers):
continue
item = {
'prompt': line['passage'],
'question': qa_pair['question'],
'answers': dropDataset.get_answers(validated_answers),
}
dataset_list.append(item)
def only_number(example): dataset_list = Dataset.from_list(dataset_list)
for i in example['answers_spans']['types']: return DatasetDict({'validation': dataset_list})
if i == 'number':
return True
return False
dataset = dataset.filter(only_number)
dataset = dataset.map(pre_process).remove_columns(
['section_id', 'query_id'])
return DatasetDict({'validation': dataset})

View File

@ -1,6 +1,7 @@
import os
import re import re
from datasets import DatasetDict, load_dataset from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
@ -11,15 +12,30 @@ from .base import BaseDataset
class FloresFirst100Dataset(BaseDataset): class FloresFirst100Dataset(BaseDataset):
@staticmethod @staticmethod
def load(name): def load_single(src_path, tgt_path, src_lang, tgt_lang):
return DatasetDict({
'dev': with open(src_path, 'r', encoding='utf-8') as f:
load_dataset(path='facebook/flores', name=name, split='dev'), src_lines = f.readlines()
'devtest': with open(tgt_path, 'r', encoding='utf-8') as f:
load_dataset(path='facebook/flores', tgt_lines = f.readlines()
name=name, assert len(src_lines) == len(tgt_lines)
split='devtest[:100]') dataset_list = [{
}) f'sentence_{src_lang}': src_lines[i].strip(),
f'sentence_{tgt_lang}': tgt_lines[i].strip(),
} for i in range(len(src_lines))]
return Dataset.from_list(dataset_list)
@staticmethod
def load(path, name):
src_lang, tgt_lang = name.split('-')
dev_dataset = FloresFirst100Dataset.load_single(
os.path.join(path, 'dev', f'{src_lang}.dev'),
os.path.join(path, 'dev', f'{tgt_lang}.dev'), src_lang, tgt_lang)
devtest_dataset = FloresFirst100Dataset.load_single(
os.path.join(path, 'devtest', f'{src_lang}.devtest'),
os.path.join(path, 'devtest', f'{tgt_lang}.devtest'), src_lang,
tgt_lang)
return DatasetDict({'dev': dev_dataset, 'devtest': devtest_dataset})
@TEXT_POSTPROCESSORS.register_module('flores') @TEXT_POSTPROCESSORS.register_module('flores')

View File

@ -1,5 +1,30 @@
import json
import os
from datasets import Dataset, DatasetDict
from opencompass.openicl import BaseEvaluator from opencompass.openicl import BaseEvaluator
from opencompass.registry import TEXT_POSTPROCESSORS from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@LOAD_DATASET.register_module()
class GSM8KDataset(BaseDataset):
@staticmethod
def load(path):
datasets = {}
for split in ['train', 'test']:
split_path = os.path.join(path, split + '.jsonl')
dataset = []
with open(split_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line.strip())
line['answer']
dataset.append(line)
datasets[split] = Dataset.from_list(dataset)
return DatasetDict(datasets)
@TEXT_POSTPROCESSORS.register_module('gsm8k_dataset') @TEXT_POSTPROCESSORS.register_module('gsm8k_dataset')

View File

@ -1,6 +1,6 @@
import json import json
from datasets import Dataset, load_dataset from datasets import Dataset
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -11,15 +11,20 @@ from .base import BaseDataset
class hellaswagDataset(BaseDataset): class hellaswagDataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs) dataset = []
with open(path, 'r', encoding='utf-8') as f:
def preprocess(example): for line in f:
for i in range(4): data = json.loads(line)
example[chr(ord('A') + i)] = example['endings'][i] dataset.append({
return example 'ctx': data['query'].split(': ', 2)[-1],
'A': data['choices'][0],
dataset = dataset.map(preprocess).remove_columns(['endings']) 'B': data['choices'][1],
'C': data['choices'][2],
'D': data['choices'][3],
'label': data['gold'],
})
dataset = Dataset.from_list(dataset)
return dataset return dataset
@ -27,19 +32,20 @@ class hellaswagDataset(BaseDataset):
class hellaswagDataset_V2(BaseDataset): class hellaswagDataset_V2(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs) dataset = []
with open(path, 'r', encoding='utf-8') as f:
def preprocess(example): for line in f:
for i in range(4): data = json.loads(line)
example[chr(ord('A') + i)] = example['endings'][i] dataset.append({
if example['label']: 'ctx': data['query'].split(': ', 1)[-1],
example['label'] = 'ABCD'[int(example['label'])] 'A': data['choices'][0],
else: 'B': data['choices'][1],
example['label'] = 'NULL' 'C': data['choices'][2],
return example 'D': data['choices'][3],
'label': 'ABCD'[data['gold']],
dataset = dataset.map(preprocess).remove_columns(['endings']) })
dataset = Dataset.from_list(dataset)
return dataset return dataset

View File

@ -1,9 +1,27 @@
import json
import os.path as osp import os.path as osp
import re import re
import tempfile import tempfile
from typing import List from typing import List
from datasets import Dataset
from opencompass.openicl.icl_evaluator import BaseEvaluator from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class HumanevalDataset(BaseDataset):
@staticmethod
def load(path):
dataset = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
dataset.append(json.loads(line.strip()))
return Dataset.from_list(dataset)
class HumanEvaluator(BaseEvaluator): class HumanEvaluator(BaseEvaluator):

View File

@ -1,7 +1,8 @@
import json
import re import re
import string import string
from datasets import DatasetDict, load_dataset from datasets import Dataset, DatasetDict
from opencompass.openicl.icl_evaluator import BaseEvaluator from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
@ -14,16 +15,12 @@ from .base import BaseDataset
class lambadaDataset(BaseDataset): class lambadaDataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs, split='test') dataset = []
with open(path, 'r', encoding='utf-8') as f:
def preprocess(example): for line in f:
prompt, target = example['text'].strip().rsplit(' ', 1) dataset.append(json.loads(line))
example['prompt'] = prompt dataset = Dataset.from_list(dataset)
example['label'] = target
return example
dataset = dataset.map(preprocess)
return DatasetDict({'test': dataset}) return DatasetDict({'test': dataset})

View File

@ -1,4 +1,6 @@
from datasets import load_dataset import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -9,33 +11,46 @@ from .base import BaseDataset
class OBQADataset(BaseDataset): class OBQADataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs) dataset_list = []
with open(path, 'r') as f:
def pre_process(example): for line in f:
for i in range(4): line = json.loads(line)
example[chr(ord('A') + i)] = example['choices']['text'][i] item = {
return example 'A': line['question']['choices'][0]['text'],
'B': line['question']['choices'][1]['text'],
dataset = dataset.map(pre_process).remove_columns(['id', 'choices']) 'C': line['question']['choices'][2]['text'],
return dataset 'D': line['question']['choices'][3]['text'],
'question_stem': line['question']['stem'],
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class OBQADataset_V2(BaseDataset): class OBQADataset_V2(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path):
dataset = load_dataset(**kwargs) dataset_list = []
with open(path, 'r') as f:
def pre_process(example): for line in f:
example['A'] = example['choices']['text'][0] line = json.loads(line)
example['B'] = example['choices']['text'][1] question = line['question']['stem']
example['C'] = example['choices']['text'][2] if not question.endswith('?'):
example['D'] = example['choices']['text'][3] question += ' what?'
if not example['question_stem'].endswith('?'): item = {
example['question_stem'] += ' what?' 'A': line['question']['choices'][0]['text'],
return example 'B': line['question']['choices'][1]['text'],
'C': line['question']['choices'][2]['text'],
dataset = dataset.map(pre_process).remove_columns(['id', 'choices']) 'D': line['question']['choices'][3]['text'],
return dataset 'question_stem': question,
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)

View File

@ -1,50 +1,108 @@
from datasets import load_dataset import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
from .base import BaseDataset from .base import BaseDataset
@LOAD_DATASET.register_module()
class piqaDataset(BaseDataset):
@staticmethod
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
dataset.append(i)
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = piqaDataset.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class piqaDataset_V2(BaseDataset): class piqaDataset_V2(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load_single(path, data_filename, label_filename):
dataset = load_dataset(**kwargs) data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
def preprocess(example): dataset = []
assert isinstance(example['label'], int) with open(data_path, 'r', encoding='utf-8') as f:
if example['label'] < 0: data_lines = f.readlines()
example['answer'] = 'NULL' with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
label = int(label.strip())
if label < 0:
i['answer'] = 'NULL'
else: else:
example['answer'] = 'AB'[example['label']] i['answer'] = 'AB'[label]
example.pop('label') dataset.append(i)
return example
dataset = dataset.map(preprocess) return Dataset.from_list(dataset)
return dataset
@staticmethod
def load(path):
train_dataset = piqaDataset_V2.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset_V2.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class piqaDataset_V3(BaseDataset): class piqaDataset_V3(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load_single(path, data_filename, label_filename):
dataset = load_dataset(**kwargs) data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
def preprocess(example): dataset = []
example['goal'] = example['goal'][0].upper() + example['goal'][1:] with open(data_path, 'r', encoding='utf-8') as f:
if example['goal'].endswith('?') or example['goal'].endswith('.'): data_lines = f.readlines()
example['sol1'] = example['sol1'][0].upper( with open(label_path, 'r', encoding='utf-8') as f:
) + example['sol1'][1:] label_lines = f.readlines()
example['sol2'] = example['sol2'][0].upper( assert len(data_lines) == len(label_lines)
) + example['sol2'][1:] for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
# some preprocessing
i['goal'] = i['goal'][0].upper() + i['goal'][1:]
if i['goal'].endswith('?') or i['goal'].endswith('.'):
i['sol1'] = i['sol1'][0].upper() + i['sol1'][1:]
i['sol2'] = i['sol2'][0].upper() + i['sol2'][1:]
else: else:
example['sol1'] = example['sol1'][0].lower( i['sol1'] = i['sol1'][0].lower() + i['sol1'][1:]
) + example['sol1'][1:] i['sol2'] = i['sol2'][0].lower() + i['sol2'][1:]
example['sol2'] = example['sol2'][0].lower(
) + example['sol2'][1:]
return example
dataset = dataset.map(preprocess) dataset.append(i)
return dataset
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = piqaDataset_V3.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = piqaDataset_V3.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})

View File

@ -1,4 +1,7 @@
from datasets import load_dataset import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -10,12 +13,21 @@ class RaceDataset(BaseDataset):
@staticmethod @staticmethod
def load(path: str, name: str): def load(path: str, name: str):
dataset = load_dataset(path, name) dataset = {}
for split in ['validation', 'test']:
def preprocess(x): jsonl_path = os.path.join(path, split, f'{name}.jsonl')
for ans, option in zip(['A', 'B', 'C', 'D'], x['options']): dataset_list = []
x[ans] = option with open(jsonl_path, 'r', encoding='utf-8') as f:
del x['options'] for line in f:
return x line = json.loads(line)
dataset_list.append({
return dataset.map(preprocess) 'article': line['article'],
'question': line['question'],
'A': line['options'][0],
'B': line['options'][1],
'C': line['options'][2],
'D': line['options'][3],
'answer': line['answer'],
})
dataset[split] = Dataset.from_list(dataset_list)
return DatasetDict(dataset)

View File

@ -1,4 +1,7 @@
from datasets import load_dataset import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -6,24 +9,72 @@ from .base import BaseDataset
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class siqaDataset_V2(BaseDataset): class siqaDataset(BaseDataset):
"""Disconnect from HuggingFace version of HFDataset."""
@staticmethod @staticmethod
def load(**kwargs): def load_single(path, data_filename, label_filename):
dataset = load_dataset(**kwargs) data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
i['label'] = int(label.strip())
dataset.append(i)
def preprocess(example): return Dataset.from_list(dataset)
example['all_labels'] = {
@staticmethod
def load(path):
train_dataset = siqaDataset.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = siqaDataset.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
@LOAD_DATASET.register_module()
class siqaDataset_V2(BaseDataset):
"""Disconnect from HuggingFace version of siqaDataset_V2."""
@staticmethod
def load_single(path, data_filename, label_filename):
data_path = os.path.join(path, data_filename)
label_path = os.path.join(path, label_filename)
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
data_lines = f.readlines()
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = f.readlines()
assert len(data_lines) == len(label_lines)
for data, label in zip(data_lines, label_lines):
i = json.loads(data.strip())
label = int(label.strip())
# some preprocessing
i['all_labels'] = {
'candidates': [ 'candidates': [
f'A. {example["answerA"]}', [f'A. {i["answerA"]}', 'A', i['answerA']],
f'B. {example["answerB"]}', [f'B. {i["answerB"]}', 'B', i['answerB']],
f'C. {example["answerC"]}', [f'C. {i["answerC"]}', 'C', i['answerC']],
], ],
'label': 'label':
int(example['label']) - 1 label - 1
} }
example['label'] = ' ABC'[int(example['label'])] i['label'] = ' ABC'[label]
return example
dataset = dataset.map(preprocess) dataset.append(i)
return dataset
return Dataset.from_list(dataset)
@staticmethod
def load(path):
train_dataset = siqaDataset_V2.load_single(path, 'train.jsonl',
'train-labels.lst')
val_dataset = siqaDataset_V2.load_single(path, 'dev.jsonl',
'dev-labels.lst')
return DatasetDict({'train': train_dataset, 'validation': val_dataset})

View File

@ -1,4 +1,7 @@
from datasets import DatasetDict, load_dataset import json
import os
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -9,38 +12,39 @@ from .base import BaseDataset
class storyclozeDataset(BaseDataset): class storyclozeDataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path, lang):
# special process dataset_list = []
dataset = load_dataset(**kwargs, split='train+eval') for split in ['train', 'eval']:
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
def preprocess(example): with open(split_path, 'r', encoding='utf-8') as f:
example['context'] = ' '.join([ for line in f:
example['input_sentence_1'], example['input_sentence_2'], line = json.loads(line)
example['input_sentence_3'], example['input_sentence_4'] line['context'] = ' '.join([
]) line['input_sentence_1'], line['input_sentence_2'],
return example line['input_sentence_3'], line['input_sentence_4']
])
dataset = dataset.map(preprocess) dataset_list.append(line)
dataset_list = Dataset.from_list(dataset_list)
return DatasetDict({'test': dataset}) return DatasetDict({'test': dataset_list})
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class storyclozeDataset_V2(BaseDataset): class storyclozeDataset_V2(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path, lang):
# special process dataset_list = []
dataset = load_dataset(**kwargs, split='train+eval') for split in ['train', 'eval']:
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
def preprocess(example): with open(split_path, 'r', encoding='utf-8') as f:
example['context'] = ' '.join([ for line in f:
example['input_sentence_1'], example['input_sentence_2'], line = json.loads(line)
example['input_sentence_3'], example['input_sentence_4'] line['context'] = ' '.join([
]) line['input_sentence_1'], line['input_sentence_2'],
example['answer_right_ending'] = ' AB'[ line['input_sentence_3'], line['input_sentence_4']
example['answer_right_ending']] ])
return example line['answer_right_ending'] = ' AB'[
line['answer_right_ending']]
dataset = dataset.map(preprocess) dataset_list.append(line)
return dataset dataset_list = Dataset.from_list(dataset_list)
return dataset_list

View File

@ -1,6 +1,11 @@
import json
import re import re
from opencompass.registry import TEXT_POSTPROCESSORS from datasets import Dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
@TEXT_POSTPROCESSORS.register_module('strategyqa') @TEXT_POSTPROCESSORS.register_module('strategyqa')
@ -16,3 +21,13 @@ def strategyqa_pred_postprocess(text: str) -> str:
@TEXT_POSTPROCESSORS.register_module('strategyqa_dataset') @TEXT_POSTPROCESSORS.register_module('strategyqa_dataset')
def strategyqa_dataset_postprocess(text: str) -> str: def strategyqa_dataset_postprocess(text: str) -> str:
return 'yes' if str(text) == 'True' else 'no' return 'yes' if str(text) == 'True' else 'no'
@LOAD_DATASET.register_module()
class StrategyQADataset(BaseDataset):
@staticmethod
def load(path):
with open(path, 'r', encoding='utf-8') as f:
dataset = json.load(f)
return Dataset.from_list(dataset)

View File

@ -1,7 +1,9 @@
import json
import os
import re import re
from collections import Counter from collections import Counter
from datasets import load_dataset from datasets import Dataset
from opencompass.openicl.icl_evaluator import BaseEvaluator from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.utils.text_postprocessors import general_postprocess from opencompass.utils.text_postprocessors import general_postprocess
@ -12,15 +14,16 @@ from .base import BaseDataset
class TydiQADataset(BaseDataset): class TydiQADataset(BaseDataset):
@staticmethod @staticmethod
def load(**kwargs): def load(path, lang):
dataset = load_dataset(**kwargs) path = os.path.join(path, 'dev', f'{lang}-dev.jsonl')
dataset_list = []
def pre_process(example): with open(path, 'r', encoding='utf-8') as f:
example['answer'] = example['answers']['text'] for line in f:
return example line = json.loads(line)
answer = list(set([i['text'] for i in line['answers']]))
dataset = dataset.map(pre_process).remove_columns(['id', 'answers']) line['answer'] = answer
return dataset dataset_list.append(line)
return Dataset.from_list(dataset_list)
class TydiQAEvaluator(BaseEvaluator): class TydiQAEvaluator(BaseEvaluator):

View File

@ -1,4 +1,7 @@
from datasets import load_dataset import json
import os
from datasets import Dataset
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -7,38 +10,49 @@ from .base import BaseDataset
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class winograndeDataset(BaseDataset): class winograndeDataset(BaseDataset):
"""Disconnect from Huggingface, winograndeDataset."""
@staticmethod @staticmethod
def load(**kwargs): def load(path):
path = os.path.join(path, 'dev.jsonl')
dataset = load_dataset(**kwargs) dataset_list = []
with open(path, 'r', encoding='utf-8') as f:
def preprocess(example): for line in f:
prompt = example.pop('sentence') line = json.loads(line)
example['opt1'] = prompt.replace('_', example.pop('option1')) prompt = line['sentence']
example['opt2'] = prompt.replace('_', example.pop('option2')) dataset_list.append({
return example 'opt1':
prompt.replace('_', line['option1']),
return dataset.map(preprocess) 'opt2':
prompt.replace('_', line['option2']),
'answer':
line['answer']
})
dataset_list = Dataset.from_list(dataset_list)
return dataset_list
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class winograndeDataset_V2(BaseDataset): class winograndeDataset_V2(BaseDataset):
"""Disconnect from Huggingface, winograndeDataset_V2."""
@staticmethod @staticmethod
def load(**kwargs): def load(path):
path = os.path.join(path, 'dev.jsonl')
dataset = load_dataset(**kwargs) dataset_list = []
with open(path, 'r', encoding='utf-8') as f:
def preprocess(example): for line in f:
prompt = example.pop('sentence') line = json.loads(line)
example['opt1'] = prompt.replace('_', example.pop('option1')) prompt = line['sentence']
example['opt2'] = prompt.replace('_', example.pop('option2')) answer = line['answer']
answer = example.pop('answer') answer = ' AB'[int(answer)] if answer != '' else 'NULL'
if answer == '': dataset_list.append({
example['label'] = 'NULL' 'opt1':
else: prompt.replace('_', line['option1']),
example['label'] = ' AB'[int(answer)] 'opt2':
return example prompt.replace('_', line['option2']),
'answer':
return dataset.map(preprocess) answer
})
dataset_list = Dataset.from_list(dataset_list)
return dataset_list