From 012546666b10339420d18472fb025413a1b82804 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 26 Sep 2023 14:31:15 +0800 Subject: [PATCH] [SIG] add WikiText-2&103 (#397) * fix conflict * add eval_cfg --- .../datasets/wikitext/wikitext_103_raw_ppl.py | 4 ++ .../wikitext/wikitext_103_raw_ppl_752e2a.py | 39 +++++++++++++++++++ .../datasets/wikitext/wikitext_2_raw_ppl.py | 4 ++ .../wikitext/wikitext_2_raw_ppl_752e2a.py | 39 +++++++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 configs/datasets/wikitext/wikitext_103_raw_ppl.py create mode 100644 configs/datasets/wikitext/wikitext_103_raw_ppl_752e2a.py create mode 100644 configs/datasets/wikitext/wikitext_2_raw_ppl.py create mode 100644 configs/datasets/wikitext/wikitext_2_raw_ppl_752e2a.py diff --git a/configs/datasets/wikitext/wikitext_103_raw_ppl.py b/configs/datasets/wikitext/wikitext_103_raw_ppl.py new file mode 100644 index 00000000..9a386cf0 --- /dev/null +++ b/configs/datasets/wikitext/wikitext_103_raw_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .wikitext_103_raw_ppl_752e2a import wikitext_103_raw_datasets # noqa: F401, F403 diff --git a/configs/datasets/wikitext/wikitext_103_raw_ppl_752e2a.py b/configs/datasets/wikitext/wikitext_103_raw_ppl_752e2a.py new file mode 100644 index 00000000..02dea1e9 --- /dev/null +++ b/configs/datasets/wikitext/wikitext_103_raw_ppl_752e2a.py @@ -0,0 +1,39 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + + +wikitext_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={0: '{text}'} + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), # need a new ppl inferencer +) + +wikitext_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +wikitext_103_raw_datasets = [] +for _split in ["validation", "test"]: + + wikitext_reader_cfg = dict( + input_columns=["text"], + output_column=None, + train_split="train", + test_split=_split, + ) + + wikitext_103_raw_datasets.append( + dict( + abbr=f"wikitext-103-raw-{_split}", + type=HFDataset, + path="wikitext", + name="wikitext-103-raw-v1", + reader_cfg=wikitext_reader_cfg, + infer_cfg=wikitext_infer_cfg, + eval_cfg=wikitext_eval_cfg, + ) + ) diff --git a/configs/datasets/wikitext/wikitext_2_raw_ppl.py b/configs/datasets/wikitext/wikitext_2_raw_ppl.py new file mode 100644 index 00000000..18549e79 --- /dev/null +++ b/configs/datasets/wikitext/wikitext_2_raw_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .wikitext_2_raw_ppl_752e2a import wikitext_2_raw_datasets # noqa: F401, F403 diff --git a/configs/datasets/wikitext/wikitext_2_raw_ppl_752e2a.py b/configs/datasets/wikitext/wikitext_2_raw_ppl_752e2a.py new file mode 100644 index 00000000..41b3b872 --- /dev/null +++ b/configs/datasets/wikitext/wikitext_2_raw_ppl_752e2a.py @@ -0,0 +1,39 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + + +wikitext_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={0: '{text}'} + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), # need a new ppl inferencer +) + +wikitext_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +wikitext_2_raw_datasets = [] +for _split in ["validation", "test"]: + + wikitext_reader_cfg = dict( + input_columns=["text"], + output_column=None, + train_split="train", + test_split=_split, + ) + + wikitext_2_raw_datasets.append( + dict( + abbr=f"wikitext-2-raw-{_split}", + type=HFDataset, + path="wikitext", + name="wikitext-2-raw-v1", + reader_cfg=wikitext_reader_cfg, + infer_cfg=wikitext_infer_cfg, + eval_cfg=wikitext_eval_cfg, + ) + )