mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Use dataset in local path (#570)
* update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name
This commit is contained in:
parent
d6aaac22e7
commit
689ffe5b63
@ -83,8 +83,8 @@ git clone https://github.com/open-compass/opencompass opencompass
|
||||
cd opencompass
|
||||
pip install -e .
|
||||
# Download dataset to data/ folder
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
|
||||
unzip OpenCompassData.zip
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
|
||||
unzip OpenCompassData-core-20231110.zip
|
||||
```
|
||||
|
||||
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).
|
||||
|
@ -85,8 +85,8 @@ git clone https://github.com/open-compass/opencompass opencompass
|
||||
cd opencompass
|
||||
pip install -e .
|
||||
# 下载数据集到 data/ 处
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
|
||||
unzip OpenCompassData.zip
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
|
||||
unzip OpenCompassData-core-20231110.zip
|
||||
```
|
||||
|
||||
有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html)。
|
||||
|
@ -52,10 +52,5 @@ with read_base():
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..flores.flores_gen_806ede import flores_datasets
|
||||
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
|
||||
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
|
||||
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
|
||||
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
|
||||
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -52,6 +52,5 @@ with read_base():
|
||||
from ..nq.nq_gen_0356ec import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
|
||||
from ..flores.flores_gen_806ede import flores_datasets
|
||||
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -34,6 +34,5 @@ with read_base():
|
||||
from ..obqa.obqa_ppl_c7c154 import obqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -52,10 +52,5 @@ with read_base():
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..flores.flores_gen_806ede import flores_datasets
|
||||
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
|
||||
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
|
||||
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
|
||||
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
|
||||
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -35,6 +35,5 @@ with read_base():
|
||||
from ..obqa.obqa_gen_9069e4 import obqa_datasets
|
||||
from ..nq.nq_gen_c788f6 import nq_datasets
|
||||
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -50,8 +50,9 @@ commonsenseqa_eval_cfg = dict(
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
abbr='commonsense_qa',
|
||||
type=commonsenseqaDataset,
|
||||
path="commonsense_qa",
|
||||
path='./data/commonsenseqa',
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg,
|
||||
|
@ -45,8 +45,9 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
abbr='commonsense_qa',
|
||||
type=commonsenseqaDataset,
|
||||
path='commonsense_qa',
|
||||
path='./data/commonsenseqa',
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg)
|
||||
|
@ -40,11 +40,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
abbr='commonsense_qa',
|
||||
type=commonsenseqaDataset,
|
||||
path='commonsense_qa',
|
||||
path='./data/commonsenseqa',
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg)
|
||||
]
|
||||
|
||||
del _ice_template
|
||||
|
@ -4,6 +4,11 @@ from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import commonsenseqaDataset
|
||||
|
||||
commonsenseqa_reader_cfg = dict(
|
||||
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
|
||||
output_column='answerKey',
|
||||
test_split='validation')
|
||||
|
||||
_ice_template = dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
@ -31,15 +36,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
commonsenseqa_datasets = [
|
||||
dict(
|
||||
abbr='commonsense_qa',
|
||||
type=commonsenseqaDataset,
|
||||
path='commonsense_qa',
|
||||
reader_cfg=dict(
|
||||
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
|
||||
output_column='answerKey',
|
||||
test_split='validation',
|
||||
),
|
||||
path='./data/commonsenseqa',
|
||||
reader_cfg=commonsenseqa_reader_cfg,
|
||||
infer_cfg=commonsenseqa_infer_cfg,
|
||||
eval_cfg=commonsenseqa_eval_cfg)
|
||||
]
|
||||
|
||||
del _ice_template
|
||||
|
@ -4,11 +4,18 @@ from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import EMEvaluator
|
||||
from opencompass.datasets import dropDataset
|
||||
|
||||
drop_reader_cfg = dict(
|
||||
input_columns=['prompt', 'question'],
|
||||
output_column='answers',
|
||||
train_split='validation',
|
||||
test_split='validation',
|
||||
)
|
||||
|
||||
drop_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=
|
||||
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
|
||||
template='''\
|
||||
Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
|
||||
Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
|
||||
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
|
||||
|
||||
@ -30,13 +37,8 @@ drop_datasets = [
|
||||
dict(
|
||||
abbr='drop',
|
||||
type=dropDataset,
|
||||
path='drop',
|
||||
reader_cfg=dict(
|
||||
input_columns=['prompt'],
|
||||
output_column='answers',
|
||||
train_split='validation',
|
||||
test_split='validation',
|
||||
),
|
||||
path='./data/drop/drop_dataset_dev.json',
|
||||
reader_cfg=drop_reader_cfg,
|
||||
infer_cfg=drop_infer_cfg,
|
||||
eval_cfg=drop_eval_cfg)
|
||||
]
|
||||
|
@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
|
||||
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
|
||||
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
|
||||
|
||||
flores_reader_cfg = dict(
|
||||
input_columns=f"sentence_{_flores_source}",
|
||||
output_column=f"sentence_{_flores_target}",
|
||||
train_split="dev",
|
||||
test_split="devtest"
|
||||
)
|
||||
flores_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
@ -146,16 +152,11 @@ for _flores_subtask in _flores_subtasks:
|
||||
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores")
|
||||
flores_datasets.append(
|
||||
dict(
|
||||
type=FloresFirst100Dataset,
|
||||
abbr=f"flores_100_{_src}-{_tgt}",
|
||||
type=FloresFirst100Dataset,
|
||||
path='./data/flores_first100',
|
||||
name=f"{_flores_source}-{_flores_target}",
|
||||
reader_cfg=dict(
|
||||
input_columns=f"sentence_{_flores_source}",
|
||||
output_column=f"sentence_{_flores_target}",
|
||||
train_split="dev",
|
||||
test_split="devtest"),
|
||||
reader_cfg=flores_reader_cfg.copy(),
|
||||
infer_cfg=flores_infer_cfg.copy(),
|
||||
eval_cfg=flores_eval_cfg.copy(),
|
||||
))
|
||||
|
||||
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
|
||||
|
@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
|
||||
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
|
||||
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
|
||||
|
||||
flores_reader_cfg = dict(
|
||||
input_columns=f"sentence_{_flores_source}",
|
||||
output_column=f"sentence_{_flores_target}",
|
||||
train_split="dev",
|
||||
test_split="devtest"
|
||||
)
|
||||
flores_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
@ -139,16 +145,11 @@ for _flores_subtask in _flores_subtasks:
|
||||
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
|
||||
flores_datasets.append(
|
||||
dict(
|
||||
type=FloresFirst100Dataset,
|
||||
abbr=f"flores_100_{_src}-{_tgt}",
|
||||
type=FloresFirst100Dataset,
|
||||
path='./data/flores_first100',
|
||||
name=f"{_flores_source}-{_flores_target}",
|
||||
reader_cfg=dict(
|
||||
input_columns=f"sentence_{_flores_source}",
|
||||
output_column=f"sentence_{_flores_target}",
|
||||
train_split="dev",
|
||||
test_split="devtest"),
|
||||
reader_cfg=flores_reader_cfg.copy(),
|
||||
infer_cfg=flores_infer_cfg.copy(),
|
||||
eval_cfg=flores_eval_cfg.copy(),
|
||||
))
|
||||
|
||||
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
|
||||
|
@ -1,8 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -32,9 +31,8 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
abbr='gsm8k',
|
||||
type=HFDataset,
|
||||
path='gsm8k',
|
||||
name='main',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
|
@ -1,8 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -79,9 +78,8 @@ gsm8k_eval_cfg = dict(
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
abbr='gsm8k',
|
||||
type=HFDataset,
|
||||
path='gsm8k',
|
||||
name='main',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import AgentInferencer
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
|
||||
|
||||
# This config is for code interpreter
|
||||
gsm8k_example = """
|
||||
@ -76,9 +76,8 @@ gsm8k_eval_cfg = dict(
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
abbr='gsm8k',
|
||||
type=HFDataset,
|
||||
path='gsm8k',
|
||||
name='main',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import SCInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' )
|
||||
generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40)
|
||||
@ -81,9 +81,8 @@ gsm8k_eval_cfg = dict(
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
abbr='gsm8k',
|
||||
type=HFDataset,
|
||||
path='gsm8k',
|
||||
name='main',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
@ -41,9 +41,9 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
|
||||
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='gsm8k',
|
||||
name='main',
|
||||
abbr='gsm8k',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
|
@ -8,7 +8,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
hellaswag_reader_cfg = dict(
|
||||
input_columns=["ctx", "A", "B", "C", "D"],
|
||||
output_column="label",
|
||||
test_split="validation")
|
||||
)
|
||||
|
||||
hellaswag_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -35,8 +35,9 @@ hellaswag_eval_cfg = dict(
|
||||
|
||||
hellaswag_datasets = [
|
||||
dict(
|
||||
abbr='hellaswag',
|
||||
type=hellaswagDataset_V2,
|
||||
path="hellaswag",
|
||||
path='./data/hellaswag/hellaswag.jsonl',
|
||||
reader_cfg=hellaswag_reader_cfg,
|
||||
infer_cfg=hellaswag_infer_cfg,
|
||||
eval_cfg=hellaswag_eval_cfg)
|
||||
|
@ -27,8 +27,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
hellaswag_datasets = [
|
||||
dict(
|
||||
abbr='hellaswag',
|
||||
type=hellaswagDataset,
|
||||
path='hellaswag',
|
||||
path='./data/hellaswag/hellaswag.jsonl',
|
||||
reader_cfg=hellaswag_reader_cfg,
|
||||
infer_cfg=hellaswag_infer_cfg,
|
||||
eval_cfg=hellaswag_eval_cfg)
|
||||
|
@ -6,9 +6,8 @@ from opencompass.datasets import hellaswagDataset
|
||||
|
||||
hellaswag_reader_cfg = dict(
|
||||
input_columns=['ctx', 'A', 'B', 'C', 'D'],
|
||||
output_column='label',
|
||||
train_split='validation',
|
||||
test_split='validation')
|
||||
output_column='label'
|
||||
)
|
||||
|
||||
hellaswag_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -26,8 +25,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
hellaswag_datasets = [
|
||||
dict(
|
||||
abbr='hellaswag',
|
||||
type=hellaswagDataset,
|
||||
path='hellaswag',
|
||||
path='./data/hellaswag/hellaswag.jsonl',
|
||||
reader_cfg=hellaswag_reader_cfg,
|
||||
infer_cfg=hellaswag_infer_cfg,
|
||||
eval_cfg=hellaswag_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='openai_humaneval',
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='openai_humaneval',
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='openai_humaneval',
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -22,8 +22,9 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='openai_humaneval',
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='openai_humaneval',
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
@ -26,7 +26,7 @@ lambada_datasets = [
|
||||
dict(
|
||||
abbr='lambada',
|
||||
type=lambadaDataset,
|
||||
path='craffel/openai_lambada',
|
||||
path='./data/lambada/test.jsonl',
|
||||
reader_cfg=lambada_reader_cfg,
|
||||
infer_cfg=lambada_infer_cfg,
|
||||
eval_cfg=lambada_eval_cfg)
|
||||
|
@ -22,7 +22,7 @@ lambada_datasets = [
|
||||
dict(
|
||||
abbr='lambada',
|
||||
type=lambadaDataset,
|
||||
path='craffel/openai_lambada',
|
||||
path='./data/lambada/test.jsonl',
|
||||
reader_cfg=lambada_reader_cfg,
|
||||
infer_cfg=lambada_infer_cfg,
|
||||
eval_cfg=lambada_eval_cfg)
|
||||
|
@ -32,15 +32,12 @@ obqa_datasets = [
|
||||
dict(
|
||||
abbr="openbookqa",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
split="test",
|
||||
path='./data/openbookqa/Main/test.jsonl',
|
||||
),
|
||||
dict(
|
||||
abbr="openbookqa_fact",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
name="additional",
|
||||
split="test",
|
||||
path='./data/openbookqa/Additional/test_complete.jsonl',
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -24,15 +24,12 @@ obqa_datasets = [
|
||||
dict(
|
||||
abbr="openbookqa",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
split="test",
|
||||
path='./data/openbookqa/Main/test.jsonl',
|
||||
),
|
||||
dict(
|
||||
abbr="openbookqa_fact",
|
||||
type=OBQADataset,
|
||||
path="openbookqa",
|
||||
name="additional",
|
||||
split="test",
|
||||
path='./data/openbookqa/Additional/test_complete.jsonl',
|
||||
),
|
||||
]
|
||||
for _i in range(2):
|
||||
|
@ -33,9 +33,7 @@ obqa_datasets = [
|
||||
dict(
|
||||
abbr='openbookqa_fact',
|
||||
type=OBQADataset_V2,
|
||||
path='openbookqa',
|
||||
name='additional',
|
||||
split='test',
|
||||
path='./data/openbookqa/Additional/test_complete.jsonl',
|
||||
reader_cfg=obqa_reader_cfg,
|
||||
infer_cfg=obqa_infer_cfg,
|
||||
eval_cfg=obqa_eval_cfg,
|
||||
|
@ -37,16 +37,14 @@ _template = [
|
||||
|
||||
obqa_datasets = [
|
||||
dict(
|
||||
abbr="openbookqa",
|
||||
type=OBQADataset,
|
||||
path='openbookqa',
|
||||
split='test',
|
||||
path='./data/openbookqa/Main/test.jsonl',
|
||||
),
|
||||
dict(
|
||||
abbr='openbookqa_fact',
|
||||
type=OBQADataset,
|
||||
path='openbookqa',
|
||||
name='additional',
|
||||
split='test',
|
||||
path='./data/openbookqa/Additional/test_complete.jsonl',
|
||||
),
|
||||
]
|
||||
for _i in range(2):
|
||||
|
@ -34,7 +34,7 @@ piqa_datasets = [
|
||||
dict(
|
||||
abbr="piqa",
|
||||
type=piqaDataset_V2,
|
||||
path="piqa",
|
||||
path='./data/piqa',
|
||||
reader_cfg=piqa_reader_cfg,
|
||||
infer_cfg=piqa_infer_cfg,
|
||||
eval_cfg=piqa_eval_cfg)
|
||||
|
@ -30,7 +30,7 @@ piqa_datasets = [
|
||||
dict(
|
||||
abbr='piqa',
|
||||
type=piqaDataset_V3,
|
||||
path='piqa',
|
||||
path='./data/piqa',
|
||||
reader_cfg=piqa_reader_cfg,
|
||||
infer_cfg=piqa_infer_cfg,
|
||||
eval_cfg=piqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import piqaDataset
|
||||
|
||||
piqa_reader_cfg = dict(
|
||||
input_columns=['goal', 'sol1', 'sol2'],
|
||||
@ -23,8 +23,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
piqa_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='piqa',
|
||||
abbr='piqa',
|
||||
type=piqaDataset,
|
||||
path='./data/piqa',
|
||||
reader_cfg=piqa_reader_cfg,
|
||||
infer_cfg=piqa_infer_cfg,
|
||||
eval_cfg=piqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import piqaDataset
|
||||
|
||||
piqa_reader_cfg = dict(
|
||||
input_columns=['goal', 'sol1', 'sol2'],
|
||||
@ -33,8 +33,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
piqa_datasets = [
|
||||
dict(
|
||||
type=HFDataset,
|
||||
path='piqa',
|
||||
abbr='piqa',
|
||||
type=piqaDataset,
|
||||
path='./data/piqa',
|
||||
reader_cfg=piqa_reader_cfg,
|
||||
infer_cfg=piqa_infer_cfg,
|
||||
eval_cfg=piqa_eval_cfg)
|
||||
|
@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer')
|
||||
output_column='answer',
|
||||
train_split="validation",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -29,17 +32,17 @@ race_eval_cfg = dict(
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-middle',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-high',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
|
@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer')
|
||||
output_column='answer',
|
||||
train_split="validation",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -23,17 +26,17 @@ race_eval_cfg = dict(
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-middle',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-high',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
|
@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer')
|
||||
output_column='answer',
|
||||
train_split="validation",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -27,17 +30,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-middle',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-high',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
|
@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer')
|
||||
output_column='answer',
|
||||
train_split="validation",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -29,17 +32,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-middle',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-high',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
|
@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer')
|
||||
output_column='answer',
|
||||
train_split="validation",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -28,17 +31,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-middle',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
type=RaceDataset,
|
||||
abbr='race-high',
|
||||
path='race',
|
||||
type=RaceDataset,
|
||||
path='./data/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
|
@ -34,7 +34,7 @@ siqa_datasets = [
|
||||
dict(
|
||||
abbr="siqa",
|
||||
type=siqaDataset_V2,
|
||||
path="social_i_qa",
|
||||
path='./data/siqa',
|
||||
reader_cfg=siqa_reader_cfg,
|
||||
infer_cfg=siqa_infer_cfg,
|
||||
eval_cfg=siqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import siqaDataset
|
||||
|
||||
siqa_reader_cfg = dict(
|
||||
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
|
||||
@ -25,8 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
siqa_datasets = [
|
||||
dict(
|
||||
abbr="siqa",
|
||||
type=HFDataset,
|
||||
path='social_i_qa',
|
||||
type=siqaDataset,
|
||||
path='./data/siqa',
|
||||
reader_cfg=siqa_reader_cfg,
|
||||
infer_cfg=siqa_infer_cfg,
|
||||
eval_cfg=siqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import siqaDataset
|
||||
|
||||
siqa_reader_cfg = dict(
|
||||
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
|
||||
@ -25,9 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
siqa_datasets = [
|
||||
dict(
|
||||
abbr="siqa",
|
||||
type=HFDataset,
|
||||
path='social_i_qa',
|
||||
name='social_i_qa',
|
||||
type=siqaDataset,
|
||||
path='./data/siqa',
|
||||
reader_cfg=siqa_reader_cfg,
|
||||
infer_cfg=siqa_infer_cfg,
|
||||
eval_cfg=siqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import siqaDataset
|
||||
|
||||
siqa_reader_cfg = dict(
|
||||
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
|
||||
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
siqa_datasets = [
|
||||
dict(
|
||||
abbr="siqa",
|
||||
type=HFDataset,
|
||||
path='social_i_qa',
|
||||
type=siqaDataset,
|
||||
path='./data/siqa',
|
||||
reader_cfg=siqa_reader_cfg,
|
||||
infer_cfg=siqa_infer_cfg,
|
||||
eval_cfg=siqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset
|
||||
from opencompass.datasets import siqaDataset
|
||||
|
||||
siqa_reader_cfg = dict(
|
||||
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
|
||||
@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
siqa_datasets = [
|
||||
dict(
|
||||
abbr="siqa",
|
||||
type=HFDataset,
|
||||
path='social_i_qa',
|
||||
type=siqaDataset,
|
||||
path='./data/siqa',
|
||||
reader_cfg=siqa_reader_cfg,
|
||||
infer_cfg=siqa_infer_cfg,
|
||||
eval_cfg=siqa_eval_cfg)
|
||||
|
@ -37,8 +37,8 @@ storycloze_datasets = [
|
||||
dict(
|
||||
abbr="story_cloze",
|
||||
type=storyclozeDataset_V2,
|
||||
path="juletxara/xstory_cloze",
|
||||
name="en",
|
||||
path='./data/xstory_cloze',
|
||||
lang='en',
|
||||
reader_cfg=storycloze_reader_cfg,
|
||||
infer_cfg=storycloze_infer_cfg,
|
||||
eval_cfg=storycloze_eval_cfg,
|
||||
|
@ -31,8 +31,8 @@ storycloze_datasets = [
|
||||
dict(
|
||||
abbr='story_cloze',
|
||||
type=storyclozeDataset,
|
||||
path='juletxara/xstory_cloze',
|
||||
name='en',
|
||||
path='./data/xstory_cloze',
|
||||
lang='en',
|
||||
reader_cfg=storycloze_reader_cfg,
|
||||
infer_cfg=storycloze_infer_cfg,
|
||||
eval_cfg=storycloze_eval_cfg)
|
||||
|
@ -28,8 +28,8 @@ storycloze_datasets = [
|
||||
dict(
|
||||
abbr='story_cloze',
|
||||
type=storyclozeDataset,
|
||||
path='juletxara/xstory_cloze',
|
||||
name='en',
|
||||
path='./data/xstory_cloze',
|
||||
lang='en',
|
||||
reader_cfg=storycloze_reader_cfg,
|
||||
infer_cfg=storycloze_infer_cfg,
|
||||
eval_cfg=storycloze_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
|
||||
strategyqa_reader_cfg = dict(
|
||||
input_columns=['question'],
|
||||
@ -86,8 +86,8 @@ strategyqa_eval_cfg = dict(
|
||||
strategyqa_datasets = [
|
||||
dict(
|
||||
abbr='strategyqa',
|
||||
type=HFDataset,
|
||||
path='wics/strategy-qa',
|
||||
type=StrategyQADataset,
|
||||
path='./data/strategyqa/strategyQA_train.json',
|
||||
reader_cfg=strategyqa_reader_cfg,
|
||||
infer_cfg=strategyqa_infer_cfg,
|
||||
eval_cfg=strategyqa_eval_cfg)
|
||||
|
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
|
||||
|
||||
strategyqa_reader_cfg = dict(
|
||||
input_columns=['question'],
|
||||
@ -50,8 +50,8 @@ strategyqa_eval_cfg = dict(
|
||||
strategyqa_datasets = [
|
||||
dict(
|
||||
abbr='strategyqa',
|
||||
type=HFDataset,
|
||||
path='wics/strategy-qa',
|
||||
type=StrategyQADataset,
|
||||
path='./data/strategyqa/strategyQA_train.json',
|
||||
reader_cfg=strategyqa_reader_cfg,
|
||||
infer_cfg=strategyqa_infer_cfg,
|
||||
eval_cfg=strategyqa_eval_cfg)
|
||||
|
@ -6,9 +6,8 @@ from opencompass.datasets import TydiQADataset, TydiQAEvaluator
|
||||
# All configs are for TydiQA Goldp task
|
||||
tydiqa_reader_cfg = dict(
|
||||
input_columns=["passage_text", "question_text"],
|
||||
output_column="answer",
|
||||
test_split='validation',
|
||||
train_split='validation',)
|
||||
output_column="answer"
|
||||
)
|
||||
|
||||
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
|
||||
|
||||
@ -33,19 +32,25 @@ for _lang in langs:
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
|
||||
ice_token='</E>'),
|
||||
ice_token='</E>'
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer), max_out_len=50)
|
||||
inferencer=dict(type=GenInferencer), max_out_len=50
|
||||
)
|
||||
|
||||
tydiqa_eval_cfg = dict(
|
||||
evaluator=dict(type=TydiQAEvaluator),
|
||||
ds_split='validation',
|
||||
ds_column='answer',
|
||||
)
|
||||
|
||||
tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),
|
||||
ds_split='validation',
|
||||
ds_column='answer',
|
||||
)
|
||||
tydiqa_datasets.append(
|
||||
dict(abbr=f'tyidqa-goldp_{_lang}',
|
||||
type=TydiQADataset,
|
||||
path='khalidalt/tydiqa-goldp',
|
||||
name=_lang,
|
||||
reader_cfg=tydiqa_reader_cfg,
|
||||
infer_cfg=tydiqa_infer_cfg,
|
||||
eval_cfg=tydiqa_eval_cfg))
|
||||
dict(abbr=f'tyidqa-goldp_{_lang}',
|
||||
type=TydiQADataset,
|
||||
path='./data/tydiqa',
|
||||
lang=_lang,
|
||||
reader_cfg=tydiqa_reader_cfg,
|
||||
infer_cfg=tydiqa_infer_cfg,
|
||||
eval_cfg=tydiqa_eval_cfg
|
||||
)
|
||||
)
|
||||
|
@ -7,8 +7,8 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
winogrande_reader_cfg = dict(
|
||||
input_columns=["opt1", "opt2"],
|
||||
output_column="label",
|
||||
test_split="validation")
|
||||
output_column="answer",
|
||||
)
|
||||
|
||||
winogrande_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -35,8 +35,7 @@ winogrande_datasets = [
|
||||
dict(
|
||||
abbr="winogrande",
|
||||
type=winograndeDataset_V2,
|
||||
path="winogrande",
|
||||
name="winogrande_xs",
|
||||
path='./data/winogrande',
|
||||
reader_cfg=winogrande_reader_cfg,
|
||||
infer_cfg=winogrande_infer_cfg,
|
||||
eval_cfg=winogrande_eval_cfg,
|
||||
|
@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
|
||||
winogrande_reader_cfg = dict(
|
||||
input_columns=['opt1', 'opt2'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='validation')
|
||||
)
|
||||
|
||||
winogrande_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -28,8 +27,7 @@ winogrande_datasets = [
|
||||
dict(
|
||||
abbr='winogrande',
|
||||
type=winograndeDataset,
|
||||
path='winogrande',
|
||||
name='winogrande_xs',
|
||||
path='./data/winogrande',
|
||||
reader_cfg=winogrande_reader_cfg,
|
||||
infer_cfg=winogrande_infer_cfg,
|
||||
eval_cfg=winogrande_eval_cfg)
|
||||
|
@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
|
||||
winogrande_reader_cfg = dict(
|
||||
input_columns=['opt1', 'opt2'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='validation')
|
||||
)
|
||||
|
||||
winogrande_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
@ -26,8 +25,7 @@ winogrande_datasets = [
|
||||
dict(
|
||||
abbr='winogrande',
|
||||
type=winograndeDataset,
|
||||
path='winogrande',
|
||||
name='winogrande_xs',
|
||||
path='./data/winogrande',
|
||||
reader_cfg=winogrande_reader_cfg,
|
||||
infer_cfg=winogrande_infer_cfg,
|
||||
eval_cfg=winogrande_eval_cfg)
|
||||
|
@ -87,17 +87,6 @@ summarizer = dict(
|
||||
'eprstmt-dev',
|
||||
'lambada',
|
||||
'tnews-dev',
|
||||
'--------- 安全 Safety ---------', # category
|
||||
# '偏见', # subcategory
|
||||
'crows_pairs',
|
||||
# '有毒性(判别)', # subcategory
|
||||
'civil_comments',
|
||||
# '有毒性(判别)多语言', # subcategory
|
||||
'jigsaw_multilingual',
|
||||
# '有毒性(生成)', # subcategory
|
||||
'real-toxicity-prompts',
|
||||
# '真实性/有用性', # subcategory
|
||||
'truthful_qa',
|
||||
],
|
||||
summary_groups=sum(
|
||||
[v for k, v in locals().items() if k.endswith("_summary_groups")], []),
|
||||
|
@ -56,8 +56,6 @@ summarizer = dict(
|
||||
'openbookqa_fact',
|
||||
'nq',
|
||||
'triviaqa',
|
||||
'--- Security ---',
|
||||
'crows_pairs',
|
||||
],
|
||||
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
|
||||
)
|
||||
|
@ -66,10 +66,21 @@ Run the following commands to download and place the datasets in the `${OpenComp
|
||||
|
||||
```bash
|
||||
# Run in the OpenCompass directory
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
|
||||
unzip OpenCompassData.zip
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
|
||||
unzip OpenCompassData-core-20231110.zip
|
||||
```
|
||||
|
||||
If you need to use the more comprehensive dataset (~500M) provided by OpenCompass, You can download it using the following command:
|
||||
|
||||
```bash
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
|
||||
unzip OpenCompassData-complete-20231110.zip
|
||||
cd ./data
|
||||
unzip *.zip
|
||||
```
|
||||
|
||||
The list of datasets included in both `.zip` can be found [here](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)
|
||||
|
||||
OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
|
||||
|
||||
For next step, please read [Quick Start](./quick_start.md).
|
||||
|
@ -66,10 +66,21 @@ OpenCompass 支持的数据集主要包括两个部分:
|
||||
在 OpenCompass 项目根目录下运行下面命令,将数据集准备至 `${OpenCompass}/data` 目录下:
|
||||
|
||||
```bash
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
|
||||
unzip OpenCompassData.zip
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
|
||||
unzip OpenCompassData-core-20231110.zip
|
||||
```
|
||||
|
||||
如果需要使用 OpenCompass 提供的更加完整的数据集 (~500M),可以使用下述命令进行下载:
|
||||
|
||||
```bash
|
||||
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
|
||||
unzip OpenCompassData-complete-20231110.zip
|
||||
cd ./data
|
||||
unzip *.zip
|
||||
```
|
||||
|
||||
两个 `.zip` 中所含数据集列表如[此处](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)所示。
|
||||
|
||||
OpenCompass 已经支持了大多数常用于性能比较的数据集,具体支持的数据集列表请直接在 `configs/datasets` 下进行查找。
|
||||
|
||||
接下来,你可以阅读[快速上手](./quick_start.md)了解 OpenCompass 的基本用法。
|
||||
|
@ -1,4 +1,7 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -9,14 +12,33 @@ from .base import BaseDataset
|
||||
class commonsenseqaDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
def load(path):
|
||||
dataset = {}
|
||||
for split, stub in [
|
||||
['train', 'train_rand_split.jsonl'],
|
||||
['validation', 'dev_rand_split.jsonl'],
|
||||
]:
|
||||
data_path = os.path.join(path, stub)
|
||||
dataset_list = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
dataset_list.append({
|
||||
'question':
|
||||
line['question']['stem'],
|
||||
'A':
|
||||
line['question']['choices'][0]['text'],
|
||||
'B':
|
||||
line['question']['choices'][1]['text'],
|
||||
'C':
|
||||
line['question']['choices'][2]['text'],
|
||||
'D':
|
||||
line['question']['choices'][3]['text'],
|
||||
'E':
|
||||
line['question']['choices'][4]['text'],
|
||||
'answerKey':
|
||||
line['answerKey'],
|
||||
})
|
||||
dataset[split] = Dataset.from_list(dataset_list)
|
||||
|
||||
def pre_process(example):
|
||||
for i in range(5):
|
||||
example[chr(ord('A') + i)] = example['choices']['text'][i]
|
||||
return example
|
||||
|
||||
dataset = dataset.map(pre_process).remove_columns(
|
||||
['question_concept', 'id', 'choices'])
|
||||
return dataset
|
||||
return DatasetDict(dataset)
|
||||
|
@ -1,4 +1,6 @@
|
||||
from datasets import DatasetDict, load_dataset
|
||||
import json
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -9,21 +11,37 @@ from .base import BaseDataset
|
||||
class dropDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs, split='validation')
|
||||
def get_answers(validated_answers):
|
||||
answers = []
|
||||
for answer_item in validated_answers:
|
||||
if answer_item['number']:
|
||||
answers.append(answer_item['number'])
|
||||
elif any(answer_item['date'][i] for i in ['day', 'month', 'year']):
|
||||
d = [answer_item['date'][i] for i in ['day', 'month', 'year']]
|
||||
answers.append(' '.join(d).strip())
|
||||
else:
|
||||
for span in answer_item['spans']:
|
||||
answers.append(span)
|
||||
answers = list(set(answers))
|
||||
return answers
|
||||
|
||||
def pre_process(example):
|
||||
example['answers'] = example['answers_spans']['spans']
|
||||
example['prompt'] = example.pop('passage')
|
||||
return example
|
||||
@staticmethod
|
||||
def load(path, only_number=True):
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
lines = json.load(f)
|
||||
dataset_list = []
|
||||
for line in lines.values():
|
||||
for qa_pair in line['qa_pairs']:
|
||||
validated_answers = qa_pair['validated_answers']
|
||||
if only_number and not any(i['number']
|
||||
for i in validated_answers):
|
||||
continue
|
||||
item = {
|
||||
'prompt': line['passage'],
|
||||
'question': qa_pair['question'],
|
||||
'answers': dropDataset.get_answers(validated_answers),
|
||||
}
|
||||
dataset_list.append(item)
|
||||
|
||||
def only_number(example):
|
||||
for i in example['answers_spans']['types']:
|
||||
if i == 'number':
|
||||
return True
|
||||
return False
|
||||
|
||||
dataset = dataset.filter(only_number)
|
||||
dataset = dataset.map(pre_process).remove_columns(
|
||||
['section_id', 'query_id'])
|
||||
return DatasetDict({'validation': dataset})
|
||||
dataset_list = Dataset.from_list(dataset_list)
|
||||
return DatasetDict({'validation': dataset_list})
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from datasets import DatasetDict, load_dataset
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
@ -11,15 +12,30 @@ from .base import BaseDataset
|
||||
class FloresFirst100Dataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(name):
|
||||
return DatasetDict({
|
||||
'dev':
|
||||
load_dataset(path='facebook/flores', name=name, split='dev'),
|
||||
'devtest':
|
||||
load_dataset(path='facebook/flores',
|
||||
name=name,
|
||||
split='devtest[:100]')
|
||||
})
|
||||
def load_single(src_path, tgt_path, src_lang, tgt_lang):
|
||||
|
||||
with open(src_path, 'r', encoding='utf-8') as f:
|
||||
src_lines = f.readlines()
|
||||
with open(tgt_path, 'r', encoding='utf-8') as f:
|
||||
tgt_lines = f.readlines()
|
||||
assert len(src_lines) == len(tgt_lines)
|
||||
dataset_list = [{
|
||||
f'sentence_{src_lang}': src_lines[i].strip(),
|
||||
f'sentence_{tgt_lang}': tgt_lines[i].strip(),
|
||||
} for i in range(len(src_lines))]
|
||||
return Dataset.from_list(dataset_list)
|
||||
|
||||
@staticmethod
|
||||
def load(path, name):
|
||||
src_lang, tgt_lang = name.split('-')
|
||||
dev_dataset = FloresFirst100Dataset.load_single(
|
||||
os.path.join(path, 'dev', f'{src_lang}.dev'),
|
||||
os.path.join(path, 'dev', f'{tgt_lang}.dev'), src_lang, tgt_lang)
|
||||
devtest_dataset = FloresFirst100Dataset.load_single(
|
||||
os.path.join(path, 'devtest', f'{src_lang}.devtest'),
|
||||
os.path.join(path, 'devtest', f'{tgt_lang}.devtest'), src_lang,
|
||||
tgt_lang)
|
||||
return DatasetDict({'dev': dev_dataset, 'devtest': devtest_dataset})
|
||||
|
||||
|
||||
@TEXT_POSTPROCESSORS.register_module('flores')
|
||||
|
@ -1,5 +1,30 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.openicl import BaseEvaluator
|
||||
from opencompass.registry import TEXT_POSTPROCESSORS
|
||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class GSM8KDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
datasets = {}
|
||||
for split in ['train', 'test']:
|
||||
split_path = os.path.join(path, split + '.jsonl')
|
||||
dataset = []
|
||||
with open(split_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line.strip())
|
||||
line['answer']
|
||||
dataset.append(line)
|
||||
datasets[split] = Dataset.from_list(dataset)
|
||||
return DatasetDict(datasets)
|
||||
|
||||
|
||||
@TEXT_POSTPROCESSORS.register_module('gsm8k_dataset')
|
||||
|
@ -1,6 +1,6 @@
|
||||
import json
|
||||
|
||||
from datasets import Dataset, load_dataset
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -11,15 +11,20 @@ from .base import BaseDataset
|
||||
class hellaswagDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
for i in range(4):
|
||||
example[chr(ord('A') + i)] = example['endings'][i]
|
||||
return example
|
||||
|
||||
dataset = dataset.map(preprocess).remove_columns(['endings'])
|
||||
def load(path):
|
||||
dataset = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
data = json.loads(line)
|
||||
dataset.append({
|
||||
'ctx': data['query'].split(': ', 2)[-1],
|
||||
'A': data['choices'][0],
|
||||
'B': data['choices'][1],
|
||||
'C': data['choices'][2],
|
||||
'D': data['choices'][3],
|
||||
'label': data['gold'],
|
||||
})
|
||||
dataset = Dataset.from_list(dataset)
|
||||
return dataset
|
||||
|
||||
|
||||
@ -27,19 +32,20 @@ class hellaswagDataset(BaseDataset):
|
||||
class hellaswagDataset_V2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
for i in range(4):
|
||||
example[chr(ord('A') + i)] = example['endings'][i]
|
||||
if example['label']:
|
||||
example['label'] = 'ABCD'[int(example['label'])]
|
||||
else:
|
||||
example['label'] = 'NULL'
|
||||
return example
|
||||
|
||||
dataset = dataset.map(preprocess).remove_columns(['endings'])
|
||||
def load(path):
|
||||
dataset = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
data = json.loads(line)
|
||||
dataset.append({
|
||||
'ctx': data['query'].split(': ', 1)[-1],
|
||||
'A': data['choices'][0],
|
||||
'B': data['choices'][1],
|
||||
'C': data['choices'][2],
|
||||
'D': data['choices'][3],
|
||||
'label': 'ABCD'[data['gold']],
|
||||
})
|
||||
dataset = Dataset.from_list(dataset)
|
||||
return dataset
|
||||
|
||||
|
||||
|
@ -1,9 +1,27 @@
|
||||
import json
|
||||
import os.path as osp
|
||||
import re
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class HumanevalDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
dataset = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
dataset.append(json.loads(line.strip()))
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
|
||||
class HumanEvaluator(BaseEvaluator):
|
||||
|
@ -1,7 +1,8 @@
|
||||
import json
|
||||
import re
|
||||
import string
|
||||
|
||||
from datasets import DatasetDict, load_dataset
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
|
||||
@ -14,16 +15,12 @@ from .base import BaseDataset
|
||||
class lambadaDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs, split='test')
|
||||
|
||||
def preprocess(example):
|
||||
prompt, target = example['text'].strip().rsplit(' ', 1)
|
||||
example['prompt'] = prompt
|
||||
example['label'] = target
|
||||
return example
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
def load(path):
|
||||
dataset = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
dataset.append(json.loads(line))
|
||||
dataset = Dataset.from_list(dataset)
|
||||
return DatasetDict({'test': dataset})
|
||||
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -9,33 +11,46 @@ from .base import BaseDataset
|
||||
class OBQADataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def pre_process(example):
|
||||
for i in range(4):
|
||||
example[chr(ord('A') + i)] = example['choices']['text'][i]
|
||||
return example
|
||||
|
||||
dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
|
||||
return dataset
|
||||
def load(path):
|
||||
dataset_list = []
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
item = {
|
||||
'A': line['question']['choices'][0]['text'],
|
||||
'B': line['question']['choices'][1]['text'],
|
||||
'C': line['question']['choices'][2]['text'],
|
||||
'D': line['question']['choices'][3]['text'],
|
||||
'question_stem': line['question']['stem'],
|
||||
'answerKey': line['answerKey'],
|
||||
}
|
||||
if 'fact1' in line:
|
||||
item['fact1'] = line['fact1']
|
||||
dataset_list.append(item)
|
||||
return Dataset.from_list(dataset_list)
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class OBQADataset_V2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def pre_process(example):
|
||||
example['A'] = example['choices']['text'][0]
|
||||
example['B'] = example['choices']['text'][1]
|
||||
example['C'] = example['choices']['text'][2]
|
||||
example['D'] = example['choices']['text'][3]
|
||||
if not example['question_stem'].endswith('?'):
|
||||
example['question_stem'] += ' what?'
|
||||
return example
|
||||
|
||||
dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
|
||||
return dataset
|
||||
def load(path):
|
||||
dataset_list = []
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
question = line['question']['stem']
|
||||
if not question.endswith('?'):
|
||||
question += ' what?'
|
||||
item = {
|
||||
'A': line['question']['choices'][0]['text'],
|
||||
'B': line['question']['choices'][1]['text'],
|
||||
'C': line['question']['choices'][2]['text'],
|
||||
'D': line['question']['choices'][3]['text'],
|
||||
'question_stem': question,
|
||||
'answerKey': line['answerKey'],
|
||||
}
|
||||
if 'fact1' in line:
|
||||
item['fact1'] = line['fact1']
|
||||
dataset_list.append(item)
|
||||
return Dataset.from_list(dataset_list)
|
||||
|
@ -1,50 +1,108 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class piqaDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load_single(path, data_filename, label_filename):
|
||||
data_path = os.path.join(path, data_filename)
|
||||
label_path = os.path.join(path, label_filename)
|
||||
dataset = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
data_lines = f.readlines()
|
||||
with open(label_path, 'r', encoding='utf-8') as f:
|
||||
label_lines = f.readlines()
|
||||
assert len(data_lines) == len(label_lines)
|
||||
for data, label in zip(data_lines, label_lines):
|
||||
i = json.loads(data.strip())
|
||||
i['label'] = int(label.strip())
|
||||
dataset.append(i)
|
||||
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = piqaDataset.load_single(path, 'train.jsonl',
|
||||
'train-labels.lst')
|
||||
val_dataset = piqaDataset.load_single(path, 'dev.jsonl',
|
||||
'dev-labels.lst')
|
||||
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class piqaDataset_V2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
assert isinstance(example['label'], int)
|
||||
if example['label'] < 0:
|
||||
example['answer'] = 'NULL'
|
||||
def load_single(path, data_filename, label_filename):
|
||||
data_path = os.path.join(path, data_filename)
|
||||
label_path = os.path.join(path, label_filename)
|
||||
dataset = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
data_lines = f.readlines()
|
||||
with open(label_path, 'r', encoding='utf-8') as f:
|
||||
label_lines = f.readlines()
|
||||
assert len(data_lines) == len(label_lines)
|
||||
for data, label in zip(data_lines, label_lines):
|
||||
i = json.loads(data.strip())
|
||||
label = int(label.strip())
|
||||
if label < 0:
|
||||
i['answer'] = 'NULL'
|
||||
else:
|
||||
example['answer'] = 'AB'[example['label']]
|
||||
example.pop('label')
|
||||
return example
|
||||
i['answer'] = 'AB'[label]
|
||||
dataset.append(i)
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
return dataset
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = piqaDataset_V2.load_single(path, 'train.jsonl',
|
||||
'train-labels.lst')
|
||||
val_dataset = piqaDataset_V2.load_single(path, 'dev.jsonl',
|
||||
'dev-labels.lst')
|
||||
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class piqaDataset_V3(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
example['goal'] = example['goal'][0].upper() + example['goal'][1:]
|
||||
if example['goal'].endswith('?') or example['goal'].endswith('.'):
|
||||
example['sol1'] = example['sol1'][0].upper(
|
||||
) + example['sol1'][1:]
|
||||
example['sol2'] = example['sol2'][0].upper(
|
||||
) + example['sol2'][1:]
|
||||
def load_single(path, data_filename, label_filename):
|
||||
data_path = os.path.join(path, data_filename)
|
||||
label_path = os.path.join(path, label_filename)
|
||||
dataset = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
data_lines = f.readlines()
|
||||
with open(label_path, 'r', encoding='utf-8') as f:
|
||||
label_lines = f.readlines()
|
||||
assert len(data_lines) == len(label_lines)
|
||||
for data, label in zip(data_lines, label_lines):
|
||||
i = json.loads(data.strip())
|
||||
i['label'] = int(label.strip())
|
||||
# some preprocessing
|
||||
i['goal'] = i['goal'][0].upper() + i['goal'][1:]
|
||||
if i['goal'].endswith('?') or i['goal'].endswith('.'):
|
||||
i['sol1'] = i['sol1'][0].upper() + i['sol1'][1:]
|
||||
i['sol2'] = i['sol2'][0].upper() + i['sol2'][1:]
|
||||
else:
|
||||
example['sol1'] = example['sol1'][0].lower(
|
||||
) + example['sol1'][1:]
|
||||
example['sol2'] = example['sol2'][0].lower(
|
||||
) + example['sol2'][1:]
|
||||
return example
|
||||
i['sol1'] = i['sol1'][0].lower() + i['sol1'][1:]
|
||||
i['sol2'] = i['sol2'][0].lower() + i['sol2'][1:]
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
return dataset
|
||||
dataset.append(i)
|
||||
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = piqaDataset_V3.load_single(path, 'train.jsonl',
|
||||
'train-labels.lst')
|
||||
val_dataset = piqaDataset_V3.load_single(path, 'dev.jsonl',
|
||||
'dev-labels.lst')
|
||||
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
|
||||
|
@ -1,4 +1,7 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -10,12 +13,21 @@ class RaceDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str, name: str):
|
||||
dataset = load_dataset(path, name)
|
||||
|
||||
def preprocess(x):
|
||||
for ans, option in zip(['A', 'B', 'C', 'D'], x['options']):
|
||||
x[ans] = option
|
||||
del x['options']
|
||||
return x
|
||||
|
||||
return dataset.map(preprocess)
|
||||
dataset = {}
|
||||
for split in ['validation', 'test']:
|
||||
jsonl_path = os.path.join(path, split, f'{name}.jsonl')
|
||||
dataset_list = []
|
||||
with open(jsonl_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
dataset_list.append({
|
||||
'article': line['article'],
|
||||
'question': line['question'],
|
||||
'A': line['options'][0],
|
||||
'B': line['options'][1],
|
||||
'C': line['options'][2],
|
||||
'D': line['options'][3],
|
||||
'answer': line['answer'],
|
||||
})
|
||||
dataset[split] = Dataset.from_list(dataset_list)
|
||||
return DatasetDict(dataset)
|
||||
|
@ -1,4 +1,7 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -6,24 +9,72 @@ from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class siqaDataset_V2(BaseDataset):
|
||||
class siqaDataset(BaseDataset):
|
||||
"""Disconnect from HuggingFace version of HFDataset."""
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
def load_single(path, data_filename, label_filename):
|
||||
data_path = os.path.join(path, data_filename)
|
||||
label_path = os.path.join(path, label_filename)
|
||||
dataset = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
data_lines = f.readlines()
|
||||
with open(label_path, 'r', encoding='utf-8') as f:
|
||||
label_lines = f.readlines()
|
||||
assert len(data_lines) == len(label_lines)
|
||||
for data, label in zip(data_lines, label_lines):
|
||||
i = json.loads(data.strip())
|
||||
i['label'] = int(label.strip())
|
||||
dataset.append(i)
|
||||
|
||||
def preprocess(example):
|
||||
example['all_labels'] = {
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = siqaDataset.load_single(path, 'train.jsonl',
|
||||
'train-labels.lst')
|
||||
val_dataset = siqaDataset.load_single(path, 'dev.jsonl',
|
||||
'dev-labels.lst')
|
||||
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class siqaDataset_V2(BaseDataset):
|
||||
"""Disconnect from HuggingFace version of siqaDataset_V2."""
|
||||
|
||||
@staticmethod
|
||||
def load_single(path, data_filename, label_filename):
|
||||
data_path = os.path.join(path, data_filename)
|
||||
label_path = os.path.join(path, label_filename)
|
||||
dataset = []
|
||||
with open(data_path, 'r', encoding='utf-8') as f:
|
||||
data_lines = f.readlines()
|
||||
with open(label_path, 'r', encoding='utf-8') as f:
|
||||
label_lines = f.readlines()
|
||||
assert len(data_lines) == len(label_lines)
|
||||
for data, label in zip(data_lines, label_lines):
|
||||
i = json.loads(data.strip())
|
||||
label = int(label.strip())
|
||||
# some preprocessing
|
||||
i['all_labels'] = {
|
||||
'candidates': [
|
||||
f'A. {example["answerA"]}',
|
||||
f'B. {example["answerB"]}',
|
||||
f'C. {example["answerC"]}',
|
||||
[f'A. {i["answerA"]}', 'A', i['answerA']],
|
||||
[f'B. {i["answerB"]}', 'B', i['answerB']],
|
||||
[f'C. {i["answerC"]}', 'C', i['answerC']],
|
||||
],
|
||||
'label':
|
||||
int(example['label']) - 1
|
||||
label - 1
|
||||
}
|
||||
example['label'] = ' ABC'[int(example['label'])]
|
||||
return example
|
||||
i['label'] = ' ABC'[label]
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
return dataset
|
||||
dataset.append(i)
|
||||
|
||||
return Dataset.from_list(dataset)
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = siqaDataset_V2.load_single(path, 'train.jsonl',
|
||||
'train-labels.lst')
|
||||
val_dataset = siqaDataset_V2.load_single(path, 'dev.jsonl',
|
||||
'dev-labels.lst')
|
||||
return DatasetDict({'train': train_dataset, 'validation': val_dataset})
|
||||
|
@ -1,4 +1,7 @@
|
||||
from datasets import DatasetDict, load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -9,38 +12,39 @@ from .base import BaseDataset
|
||||
class storyclozeDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
# special process
|
||||
dataset = load_dataset(**kwargs, split='train+eval')
|
||||
|
||||
def preprocess(example):
|
||||
example['context'] = ' '.join([
|
||||
example['input_sentence_1'], example['input_sentence_2'],
|
||||
example['input_sentence_3'], example['input_sentence_4']
|
||||
])
|
||||
return example
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
|
||||
return DatasetDict({'test': dataset})
|
||||
def load(path, lang):
|
||||
dataset_list = []
|
||||
for split in ['train', 'eval']:
|
||||
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
|
||||
with open(split_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
line['context'] = ' '.join([
|
||||
line['input_sentence_1'], line['input_sentence_2'],
|
||||
line['input_sentence_3'], line['input_sentence_4']
|
||||
])
|
||||
dataset_list.append(line)
|
||||
dataset_list = Dataset.from_list(dataset_list)
|
||||
return DatasetDict({'test': dataset_list})
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class storyclozeDataset_V2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
# special process
|
||||
dataset = load_dataset(**kwargs, split='train+eval')
|
||||
|
||||
def preprocess(example):
|
||||
example['context'] = ' '.join([
|
||||
example['input_sentence_1'], example['input_sentence_2'],
|
||||
example['input_sentence_3'], example['input_sentence_4']
|
||||
])
|
||||
example['answer_right_ending'] = ' AB'[
|
||||
example['answer_right_ending']]
|
||||
return example
|
||||
|
||||
dataset = dataset.map(preprocess)
|
||||
return dataset
|
||||
def load(path, lang):
|
||||
dataset_list = []
|
||||
for split in ['train', 'eval']:
|
||||
split_path = os.path.join(path, f'{lang}_{split}.jsonl')
|
||||
with open(split_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
line['context'] = ' '.join([
|
||||
line['input_sentence_1'], line['input_sentence_2'],
|
||||
line['input_sentence_3'], line['input_sentence_4']
|
||||
])
|
||||
line['answer_right_ending'] = ' AB'[
|
||||
line['answer_right_ending']]
|
||||
dataset_list.append(line)
|
||||
dataset_list = Dataset.from_list(dataset_list)
|
||||
return dataset_list
|
||||
|
@ -1,6 +1,11 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from opencompass.registry import TEXT_POSTPROCESSORS
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@TEXT_POSTPROCESSORS.register_module('strategyqa')
|
||||
@ -16,3 +21,13 @@ def strategyqa_pred_postprocess(text: str) -> str:
|
||||
@TEXT_POSTPROCESSORS.register_module('strategyqa_dataset')
|
||||
def strategyqa_dataset_postprocess(text: str) -> str:
|
||||
return 'yes' if str(text) == 'True' else 'no'
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class StrategyQADataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
dataset = json.load(f)
|
||||
return Dataset.from_list(dataset)
|
||||
|
@ -1,7 +1,9 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
from datasets import load_dataset
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||
from opencompass.utils.text_postprocessors import general_postprocess
|
||||
@ -12,15 +14,16 @@ from .base import BaseDataset
|
||||
class TydiQADataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def pre_process(example):
|
||||
example['answer'] = example['answers']['text']
|
||||
return example
|
||||
|
||||
dataset = dataset.map(pre_process).remove_columns(['id', 'answers'])
|
||||
return dataset
|
||||
def load(path, lang):
|
||||
path = os.path.join(path, 'dev', f'{lang}-dev.jsonl')
|
||||
dataset_list = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
answer = list(set([i['text'] for i in line['answers']]))
|
||||
line['answer'] = answer
|
||||
dataset_list.append(line)
|
||||
return Dataset.from_list(dataset_list)
|
||||
|
||||
|
||||
class TydiQAEvaluator(BaseEvaluator):
|
||||
|
@ -1,4 +1,7 @@
|
||||
from datasets import load_dataset
|
||||
import json
|
||||
import os
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -7,38 +10,49 @@ from .base import BaseDataset
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class winograndeDataset(BaseDataset):
|
||||
"""Disconnect from Huggingface, winograndeDataset."""
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
prompt = example.pop('sentence')
|
||||
example['opt1'] = prompt.replace('_', example.pop('option1'))
|
||||
example['opt2'] = prompt.replace('_', example.pop('option2'))
|
||||
return example
|
||||
|
||||
return dataset.map(preprocess)
|
||||
def load(path):
|
||||
path = os.path.join(path, 'dev.jsonl')
|
||||
dataset_list = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
prompt = line['sentence']
|
||||
dataset_list.append({
|
||||
'opt1':
|
||||
prompt.replace('_', line['option1']),
|
||||
'opt2':
|
||||
prompt.replace('_', line['option2']),
|
||||
'answer':
|
||||
line['answer']
|
||||
})
|
||||
dataset_list = Dataset.from_list(dataset_list)
|
||||
return dataset_list
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class winograndeDataset_V2(BaseDataset):
|
||||
"""Disconnect from Huggingface, winograndeDataset_V2."""
|
||||
|
||||
@staticmethod
|
||||
def load(**kwargs):
|
||||
|
||||
dataset = load_dataset(**kwargs)
|
||||
|
||||
def preprocess(example):
|
||||
prompt = example.pop('sentence')
|
||||
example['opt1'] = prompt.replace('_', example.pop('option1'))
|
||||
example['opt2'] = prompt.replace('_', example.pop('option2'))
|
||||
answer = example.pop('answer')
|
||||
if answer == '':
|
||||
example['label'] = 'NULL'
|
||||
else:
|
||||
example['label'] = ' AB'[int(answer)]
|
||||
return example
|
||||
|
||||
return dataset.map(preprocess)
|
||||
def load(path):
|
||||
path = os.path.join(path, 'dev.jsonl')
|
||||
dataset_list = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
prompt = line['sentence']
|
||||
answer = line['answer']
|
||||
answer = ' AB'[int(answer)] if answer != '' else 'NULL'
|
||||
dataset_list.append({
|
||||
'opt1':
|
||||
prompt.replace('_', line['option1']),
|
||||
'opt2':
|
||||
prompt.replace('_', line['option2']),
|
||||
'answer':
|
||||
answer
|
||||
})
|
||||
dataset_list = Dataset.from_list(dataset_list)
|
||||
return dataset_list
|
||||
|
Loading…
Reference in New Issue
Block a user