diff --git a/configs/datasets/ARC_e/ARC_e_gen_0a29bf.py b/configs/datasets/ARC_e/ARC_e_gen_0a29bf.py new file mode 100644 index 00000000..4c82b122 --- /dev/null +++ b/configs/datasets/ARC_e/ARC_e_gen_0a29bf.py @@ -0,0 +1,42 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_e_reader_cfg = dict( + input_columns=["question", "textA", "textB", "textC", "textD"], + output_column="answerKey") + +ARC_e_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role="HUMAN", + prompt= + "Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:" + ) + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +ARC_e_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +ARC_e_datasets = [ + dict( + abbr="ARC-e", + type=ARCDataset, + path="./data/ARC/ARC-e/ARC-Easy-Dev.jsonl", + reader_cfg=ARC_e_reader_cfg, + infer_cfg=ARC_e_infer_cfg, + eval_cfg=ARC_e_eval_cfg, + ) +] diff --git a/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl.py b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl.py new file mode 100644 index 00000000..4125080f --- /dev/null +++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_ocnli_ppl_f103ab import ocnli_datasets # noqa: F401, F403 diff --git a/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_1b4c9d.py b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_1b4c9d.py new file mode 100644 index 00000000..e0a3c3ee --- /dev/null +++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_1b4c9d.py @@ -0,0 +1,35 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +ocnli_reader_cfg = dict( + input_columns=['sentence1', 'sentence2'], output_column='label') + +# TODO: two prompt templates for ocnli +ocnli_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 'contradiction': + '阅读文章:{sentence1}\n根据上文,回答如下问题: {sentence2}?\n答:错', + 'entailment': '阅读文章:{sentence1}\n根据上文,回答如下问题: {sentence2}?\n答:对', + 'neutral': '如果{sentence1}为真,那么{sentence2}也为真吗?可能' + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +ocnli_datasets = [ + dict( + type=HFDataset, + abbr='ocnli', + path='json', + split='train', + data_files='./data/CLUE/OCNLI/dev.json', + reader_cfg=ocnli_reader_cfg, + infer_cfg=ocnli_infer_cfg, + eval_cfg=ocnli_eval_cfg) +] diff --git a/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl_36eb14.py b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl_36eb14.py new file mode 100644 index 00000000..48f12cc1 --- /dev/null +++ b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl_36eb14.py @@ -0,0 +1,39 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import CHIDDataset + +chid_reader_cfg = dict( + input_columns=[f'content{i}' for i in range(7)], output_column='answer') + +chid_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={i: f"以下句子是否通顺?\n{{content{i}}}\n这个句子是通顺的。" + for i in range(7)}), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +chid_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +chid_datasets = [ + dict( + type=CHIDDataset, + path='json', + abbr='chid-dev', + data_files='./data/FewCLUE/chid/dev_few_all.json', + split='train', + reader_cfg=chid_reader_cfg, + infer_cfg=chid_infer_cfg, + eval_cfg=chid_eval_cfg), + dict( + type=CHIDDataset, + path='json', + abbr='chid-test', + data_files='./data/FewCLUE/chid/test_public.json', + split='train', + reader_cfg=chid_reader_cfg, + infer_cfg=chid_infer_cfg, + eval_cfg=chid_eval_cfg), +] diff --git a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_d90d23.py b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_d90d23.py new file mode 100644 index 00000000..797d7cd9 --- /dev/null +++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_d90d23.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import TNewsDataset + +tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2') + +tnews_labels = [ + '农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯', + '军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻' +] + +tnews_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={lb: f'{{sentence}}这篇新闻属于:{lb}' + for lb in tnews_labels}), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +tnews_datasets = [ + dict( + type=TNewsDataset, + path='json', + abbr='tnews-dev', + data_files='./data/FewCLUE/tnews/dev_few_all.json', + split='train', + reader_cfg=tnews_reader_cfg, + infer_cfg=tnews_infer_cfg, + eval_cfg=tnews_eval_cfg), + dict( + type=TNewsDataset, + path='json', + abbr='tnews-test', + data_files='./data/FewCLUE/tnews/test_public.json', + split='train', + reader_cfg=tnews_reader_cfg, + infer_cfg=tnews_infer_cfg, + eval_cfg=tnews_eval_cfg) +] diff --git a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_477186.py b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_477186.py new file mode 100644 index 00000000..6304dc4c --- /dev/null +++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_477186.py @@ -0,0 +1,42 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import AXDataset_V2 + +AX_b_reader_cfg = dict( + input_columns=["sentence1", "sentence2"], + output_column="label", +) + +AX_b_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:" + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +AX_b_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +AX_b_datasets = [ + dict( + abbr="AX_b", + type=AXDataset_V2, + path="./data/SuperGLUE/AX-b/AX-b.jsonl", + reader_cfg=AX_b_reader_cfg, + infer_cfg=AX_b_infer_cfg, + eval_cfg=AX_b_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_8525d1.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_8525d1.py new file mode 100644 index 00000000..43f65067 --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_8525d1.py @@ -0,0 +1,40 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDataset_V2 + +BoolQ_reader_cfg = dict( + input_columns=["question", "passage"], + output_column="label", +) + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt="{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:"), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +BoolQ_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +BoolQ_datasets = [ + dict( + abbr="BoolQ", + type=BoolQDataset_V2, + path="./data/SuperGLUE/BoolQ/val.jsonl", + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_bb97e1.py b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_bb97e1.py new file mode 100644 index 00000000..89762511 --- /dev/null +++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_bb97e1.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import CBDataset_V2 + +CB_reader_cfg = dict( + input_columns=["premise", "hypothesis"], + output_column="label", +) + +CB_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role="HUMAN", + prompt= + "{premise}\n{hypothesis}\nWhat is the relation between the two sentences?\nA. Contradiction\nB. Entailment\nC. Neutral\nAnswer:" + ), + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +CB_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +CB_datasets = [ + dict( + abbr="CB", + type=CBDataset_V2, + path="./data/SuperGLUE/CB/val.jsonl", + reader_cfg=CB_reader_cfg, + infer_cfg=CB_infer_cfg, + eval_cfg=CB_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen.py b/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen.py new file mode 100644 index 00000000..4aba7ef6 --- /dev/null +++ b/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_WiC_gen_c39367 import WiC_datasets # noqa: F401, F403 diff --git a/configs/datasets/XCOPA/XCOPA_ppl_6215c4.py b/configs/datasets/XCOPA/XCOPA_ppl_6215c4.py new file mode 100644 index 00000000..1e4da6db --- /dev/null +++ b/configs/datasets/XCOPA/XCOPA_ppl_6215c4.py @@ -0,0 +1,31 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import XCOPADataset + +XCOPA_reader_cfg = dict( + input_columns=['question', 'premise', 'choice1', 'choice2'], + output_column='label', + test_split='train') + +XCOPA_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.", + 1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.", + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +XCOPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +XCOPA_datasets = [ + dict( + type=XCOPADataset, + path='xcopa', + reader_cfg=XCOPA_reader_cfg, + infer_cfg=XCOPA_infer_cfg, + eval_cfg=XCOPA_eval_cfg) +] diff --git a/configs/datasets/collections/chat_small.py b/configs/datasets/collections/chat_small.py new file mode 100644 index 00000000..61e295b9 --- /dev/null +++ b/configs/datasets/collections/chat_small.py @@ -0,0 +1,40 @@ +from mmengine.config import read_base + +with read_base(): + from ..mmlu.mmlu_gen_a568f1 import mmlu_datasets + from ..ceval.ceval_gen_ee2cb0 import ceval_datasets + from ..bbh.bbh_gen_58abc3 import bbh_datasets + from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets + from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets + from ..CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets + from ..FewCLUE_bustm.FewCLUE_bustm_gen_305431 import bustm_datasets + from ..FewCLUE_chid.FewCLUE_chid_gen_686c63 import chid_datasets + from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_276956 import cluewsc_datasets + from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_d6d06d import eprstmt_datasets + from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets + from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets + from ..lambada.lambada_gen_7ffe3d import lambada_datasets + from ..storycloze.storycloze_gen_c5a230 import storycloze_datasets + from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_477186 import AX_b_datasets + from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets + from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets + from ..SuperGLUE_CB.SuperGLUE_CB_gen_bb97e1 import CB_datasets + from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_6d5e67 import COPA_datasets + from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets + from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_ce346a import RTE_datasets + from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets + from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_c39367 import WiC_datasets + from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_d8d441 import WSC_datasets + from ..race.race_gen_12de48 import race_datasets + from ..math.math_gen_78bcba import math_datasets + from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets + from ..summedits.summedits_gen_4f35b5 import summedits_datasets + from ..hellaswag.hellaswag_gen_cae9cb import hellaswag_datasets + from ..piqa.piqa_gen_8287ae import piqa_datasets + from ..winogrande.winogrande_gen_c19d87 import winogrande_datasets + from ..obqa.obqa_gen_b2cde9 import obqa_datasets + from ..nq.nq_gen_a6ffca import nq_datasets + from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets + from ..crowspairs.crowspairs_gen_dd110a import crowspairs_datasets + +datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/commonsenseqa/commonsenseqa_ppl.py b/configs/datasets/commonsenseqa/commonsenseqa_ppl.py new file mode 100644 index 00000000..f2d038d8 --- /dev/null +++ b/configs/datasets/commonsenseqa/commonsenseqa_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .commonsenseqa_ppl_2ca33c import commonsenseqa_datasets # noqa: F401, F403 diff --git a/configs/datasets/hellaswag/hellaswag_ppl.py b/configs/datasets/hellaswag/hellaswag_ppl.py new file mode 100644 index 00000000..8225ace7 --- /dev/null +++ b/configs/datasets/hellaswag/hellaswag_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .hellaswag_ppl_8e07d6 import hellaswag_datasets # noqa: F401, F403 diff --git a/configs/datasets/lambada/lambada_gen_7ffe3d.py b/configs/datasets/lambada/lambada_gen_7ffe3d.py new file mode 100644 index 00000000..d83c95cc --- /dev/null +++ b/configs/datasets/lambada/lambada_gen_7ffe3d.py @@ -0,0 +1,33 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import lambadaDataset, LambadaEvaluator + +lambada_reader_cfg = dict( + input_columns=['prompt'], + output_column='label', + train_split='test', + test_split='test') + +lambada_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role='HUMAN', + prompt='Please complete the following sentence:\n{prompt}') + ])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=5)) + +lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator)) + +lambada_datasets = [ + dict( + abbr='lambada', + type=lambadaDataset, + path='craffel/openai_lambada', + reader_cfg=lambada_reader_cfg, + infer_cfg=lambada_infer_cfg, + eval_cfg=lambada_eval_cfg) +] diff --git a/configs/datasets/mbpp/mbpp_gen_94fe59.py b/configs/datasets/mbpp/mbpp_gen_94fe59.py new file mode 100644 index 00000000..c4948f34 --- /dev/null +++ b/configs/datasets/mbpp/mbpp_gen_94fe59.py @@ -0,0 +1,27 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MBPPDataset, MBPPEvaluator + +mbpp_reader_cfg = dict( + input_columns=['text', 'test_list'], output_column='code') + +mbpp_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template= + "You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n[BEGIN]\n"), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=512)) + +mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator)) + +mbpp_datasets = [ + dict( + type=MBPPDataset, + abbr='mbpp', + path='./data/mbpp/mbpp.jsonl', + reader_cfg=mbpp_reader_cfg, + infer_cfg=mbpp_infer_cfg, + eval_cfg=mbpp_eval_cfg) +] diff --git a/configs/datasets/piqa/piqa_gen_8287ae.py b/configs/datasets/piqa/piqa_gen_8287ae.py new file mode 100644 index 00000000..09ce5289 --- /dev/null +++ b/configs/datasets/piqa/piqa_gen_8287ae.py @@ -0,0 +1,38 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import piqaDataset_V2 + +piqa_reader_cfg = dict( + input_columns=["goal", "sol1", "sol2"], + output_column="answer", + test_split="validation") + +piqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role="HUMAN", prompt="{goal}\nA. {sol1}\nB. {sol2}\nAnswer:") + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +piqa_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +piqa_datasets = [ + dict( + abbr="piqa", + type=piqaDataset_V2, + path="piqa", + reader_cfg=piqa_reader_cfg, + infer_cfg=piqa_infer_cfg, + eval_cfg=piqa_eval_cfg) +] diff --git a/configs/datasets/piqa/piqa_ppl_7c4bd8.py b/configs/datasets/piqa/piqa_ppl_7c4bd8.py new file mode 100644 index 00000000..cb194b17 --- /dev/null +++ b/configs/datasets/piqa/piqa_ppl_7c4bd8.py @@ -0,0 +1,41 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +piqa_reader_cfg = dict( + input_columns=['goal', 'sol1', 'sol2'], + output_column='label', + test_split='validation') + +piqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: + dict( + round=[ + dict(role="HUMAN", prompt="{goal}"), + dict(role="BOT", prompt="{sol1}") + ], ), + 1: + dict( + round=[ + dict(role="HUMAN", prompt="{goal}"), + dict(role="BOT", prompt="{sol2}") + ], ) + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +piqa_datasets = [ + dict( + type=HFDataset, + path='piqa', + reader_cfg=piqa_reader_cfg, + infer_cfg=piqa_infer_cfg, + eval_cfg=piqa_eval_cfg) +] diff --git a/configs/datasets/qasper/qasper_gen.py b/configs/datasets/qasper/qasper_gen.py new file mode 100644 index 00000000..616f4dd7 --- /dev/null +++ b/configs/datasets/qasper/qasper_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .qasper_gen_1230f2 import qasper_datasets # noqa: F401, F403 diff --git a/configs/datasets/qaspercut/qaspercut_gen_d783f5.py b/configs/datasets/qaspercut/qaspercut_gen_d783f5.py new file mode 100644 index 00000000..f06f7b1e --- /dev/null +++ b/configs/datasets/qaspercut/qaspercut_gen_d783f5.py @@ -0,0 +1,37 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import QASPERCUTDataset, TriviaQAEvaluator + +qaspercut_reader_cfg = dict( + input_columns=['question', 'evidence'], + output_column='answer', + train_split='dev', + test_split='dev') + +qaspercut_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role='HUMAN', + prompt='{evidence}\nAnswer these questions:\nQ: {question}?A:'), + dict(role='BOT', prompt=''), + ], )), + retriever=dict(type=ZeroRetriever), + inferencer=dict( + type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) + +qaspercut_eval_cfg = dict( + evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') + +qaspercut_datasets = [ + dict( + type=QASPERCUTDataset, + abbr='qaspercut', + path='./data/QASPER/', + reader_cfg=qaspercut_reader_cfg, + infer_cfg=qaspercut_infer_cfg, + eval_cfg=qaspercut_eval_cfg) +] diff --git a/configs/datasets/race/race_gen.py b/configs/datasets/race/race_gen.py new file mode 100644 index 00000000..b2ff1b42 --- /dev/null +++ b/configs/datasets/race/race_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .race_gen_12de48 import race_datasets # noqa: F401, F403 diff --git a/configs/datasets/siqa/siqa_gen.py b/configs/datasets/siqa/siqa_gen.py new file mode 100644 index 00000000..e5c731f9 --- /dev/null +++ b/configs/datasets/siqa/siqa_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .siqa_gen_a3c714 import siqa_datasets # noqa: F401, F403 diff --git a/configs/datasets/storycloze/storycloze_gen.py b/configs/datasets/storycloze/storycloze_gen.py new file mode 100644 index 00000000..df02ccf1 --- /dev/null +++ b/configs/datasets/storycloze/storycloze_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .storycloze_gen_c5a230 import storycloze_datasets # noqa: F401, F403 diff --git a/configs/datasets/summedits/summedits_ppl.py b/configs/datasets/summedits/summedits_ppl.py new file mode 100644 index 00000000..0c6ac839 --- /dev/null +++ b/configs/datasets/summedits/summedits_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .summedits_ppl_163352 import summedits_datasets # noqa: F401, F403 diff --git a/configs/datasets/triviaqa/triviaqa_gen_4f9838.py b/configs/datasets/triviaqa/triviaqa_gen_4f9838.py new file mode 100644 index 00000000..c8cf362e --- /dev/null +++ b/configs/datasets/triviaqa/triviaqa_gen_4f9838.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator + +triviaqa_reader_cfg = dict( + input_columns=['question'], + output_column='answer', + train_split='dev', + test_split='dev') + +triviaqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='Answer these questions:\nQ: {question}?'), + dict(role='BOT', prompt='A:'), + ], )), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=50)) + +triviaqa_eval_cfg = dict( + evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') + +triviaqa_datasets = [ + dict( + type=TriviaQADataset, + abbr='triviaqa', + path='./data/triviaqa/', + reader_cfg=triviaqa_reader_cfg, + infer_cfg=triviaqa_infer_cfg, + eval_cfg=triviaqa_eval_cfg) +] diff --git a/configs/datasets/triviaqa/triviaqa_gen_5ef393.py b/configs/datasets/triviaqa/triviaqa_gen_5ef393.py new file mode 100644 index 00000000..3265a142 --- /dev/null +++ b/configs/datasets/triviaqa/triviaqa_gen_5ef393.py @@ -0,0 +1,30 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator + +triviaqa_reader_cfg = dict( + input_columns=['question'], + output_column='answer', + train_split='dev', + test_split='dev') + +triviaqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template='Answer these questions:\nQ: {question}\nA:{answer}'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=50)) + +triviaqa_eval_cfg = dict( + evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') + +triviaqa_datasets = [ + dict( + type=TriviaQADataset, + abbr='triviaqa', + path='./data/triviaqa/', + reader_cfg=triviaqa_reader_cfg, + infer_cfg=triviaqa_infer_cfg, + eval_cfg=triviaqa_eval_cfg) +] diff --git a/configs/datasets/winograd/winograd_ppl_c21c87.py b/configs/datasets/winograd/winograd_ppl_c21c87.py new file mode 100644 index 00000000..c6330dee --- /dev/null +++ b/configs/datasets/winograd/winograd_ppl_c21c87.py @@ -0,0 +1,36 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import winogradDataset + +winograd_reader_cfg = dict( + input_columns=['opt1', 'opt2'], + output_column='label', + train_split='test', + test_split='test') + +winograd_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: + "{prompt} Q: In the previous text, what does '{pronoun}' refer to? A: {opt1}.", # noqa + 1: + "{prompt} Q: In the previous text, what does '{pronoun}' refer to? A: {opt2}.", # noqa + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +winograd_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +winograd_datasets = [ + dict( + abbr='winograd', + type=winogradDataset, + path='winograd_wsc', + name='wsc285', + reader_cfg=winograd_reader_cfg, + infer_cfg=winograd_infer_cfg, + eval_cfg=winograd_eval_cfg) +] diff --git a/configs/datasets/winogrande/winogrande_ppl_515f92.py b/configs/datasets/winogrande/winogrande_ppl_515f92.py new file mode 100644 index 00000000..e58cb485 --- /dev/null +++ b/configs/datasets/winogrande/winogrande_ppl_515f92.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import winograndeDataset + +winogrande_reader_cfg = dict( + input_columns=['opt1', 'opt2'], + output_column='answer', + train_split='validation', + test_split='validation') + +winogrande_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 1: "Good sentence: {opt1}", + 2: "Good sentence: {opt2}", + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +winogrande_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +winogrande_datasets = [ + dict( + abbr='winogrande', + type=winograndeDataset, + path='winogrande', + name='winogrande_xs', + reader_cfg=winogrande_reader_cfg, + infer_cfg=winogrande_infer_cfg, + eval_cfg=winogrande_eval_cfg) +] diff --git a/configs/summarizers/groups/ceval.py b/configs/summarizers/groups/ceval.py new file mode 100644 index 00000000..c3994b2b --- /dev/null +++ b/configs/summarizers/groups/ceval.py @@ -0,0 +1,24 @@ +ceval_summary_groups = [] + +_ceval_stem = ['computer_network', 'operating_system', 'computer_architecture', 'college_programming', 'college_physics', 'college_chemistry', 'advanced_mathematics', 'probability_and_statistics', 'discrete_mathematics', 'electrical_engineer', 'metrology_engineer', 'high_school_mathematics', 'high_school_physics', 'high_school_chemistry', 'high_school_biology', 'middle_school_mathematics', 'middle_school_biology', 'middle_school_physics', 'middle_school_chemistry', 'veterinary_medicine'] +_ceval_stem = ['ceval-' + s for s in _ceval_stem] +ceval_summary_groups.append({'name': 'ceval-stem', 'subsets': _ceval_stem}) + +_ceval_social_science = ['college_economics', 'business_administration', 'marxism', 'mao_zedong_thought', 'education_science', 'teacher_qualification', 'high_school_politics', 'high_school_geography', 'middle_school_politics', 'middle_school_geography'] +_ceval_social_science = ['ceval-' + s for s in _ceval_social_science] +ceval_summary_groups.append({'name': 'ceval-social-science', 'subsets': _ceval_social_science}) + +_ceval_humanities = ['modern_chinese_history', 'ideological_and_moral_cultivation', 'logic', 'law', 'chinese_language_and_literature', 'art_studies', 'professional_tour_guide', 'legal_professional', 'high_school_chinese', 'high_school_history', 'middle_school_history'] +_ceval_humanities = ['ceval-' + s for s in _ceval_humanities] +ceval_summary_groups.append({'name': 'ceval-humanities', 'subsets': _ceval_humanities}) + +_ceval_other = ['civil_servant', 'sports_science', 'plant_protection', 'basic_medicine', 'clinical_medicine', 'urban_and_rural_planner', 'accountant', 'fire_engineer', 'environmental_impact_assessment_engineer', 'tax_accountant', 'physician'] +_ceval_other = ['ceval-' + s for s in _ceval_other] +ceval_summary_groups.append({'name': 'ceval-other', 'subsets': _ceval_other}) + +_ceval_hard = ['advanced_mathematics', 'discrete_mathematics', 'probability_and_statistics', 'college_chemistry', 'college_physics', 'high_school_mathematics', 'high_school_chemistry', 'high_school_physics'] +_ceval_hard = ['ceval-' + s for s in _ceval_hard] +ceval_summary_groups.append({'name': 'ceval-hard', 'subsets': _ceval_hard}) + +_ceval_all = _ceval_stem + _ceval_social_science + _ceval_humanities + _ceval_other +ceval_summary_groups.append({'name': 'ceval', 'subsets': _ceval_all}) diff --git a/opencompass/datasets/ceval.py b/opencompass/datasets/ceval.py new file mode 100644 index 00000000..ac1a8015 --- /dev/null +++ b/opencompass/datasets/ceval.py @@ -0,0 +1,37 @@ +import os.path as osp + +from datasets import DatasetDict, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class CEvalDataset(BaseDataset): + + @staticmethod + def load(path: str, name: str): + dev_dataset = load_dataset( + 'csv', + data_files=osp.join(path, 'dev', f'{name}_dev.csv'), + split='train') + val_dataset = load_dataset( + 'csv', + data_files=osp.join(path, 'val', f'{name}_val.csv'), + split='train') + val_dataset = val_dataset.add_column('explanation', + [''] * len(val_dataset)) + test_dataset = load_dataset( + 'csv', + data_files=osp.join(path, 'test', f'{name}_test.csv'), + split='train') + test_dataset = test_dataset.add_column( + 'answer', + [''] * len(test_dataset)).add_column('explanation', + [''] * len(test_dataset)) + return DatasetDict({ + 'val': val_dataset, + 'dev': dev_dataset, + 'test': test_dataset + }) diff --git a/opencompass/datasets/csl.py b/opencompass/datasets/csl.py new file mode 100644 index 00000000..e9379f4f --- /dev/null +++ b/opencompass/datasets/csl.py @@ -0,0 +1,43 @@ +import json + +from datasets import Dataset, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class CslDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + + dataset = load_dataset(**kwargs) + + def preprocess(example): + keywords = ','.join(example['keyword']) + example['keywords'] = keywords + + return example + + dataset = dataset.map(preprocess) + return dataset + + +@LOAD_DATASET.register_module() +class CslDataset_V2(BaseDataset): + + @staticmethod + def load(path): + data = [] + with open(path, 'r') as f: + for line in f: + line = json.loads(line) + item = { + 'abst': line['abst'], + 'keywords': ','.join(line['keyword']), + 'label': 'AB'[int(line['label'])], + } + data.append(item) + return Dataset.from_list(data) diff --git a/opencompass/datasets/drop.py b/opencompass/datasets/drop.py new file mode 100644 index 00000000..cd3da6d2 --- /dev/null +++ b/opencompass/datasets/drop.py @@ -0,0 +1,29 @@ +from datasets import DatasetDict, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class dropDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs, split='validation') + + def pre_process(example): + example['answers'] = example['answers_spans']['spans'] + example['prompt'] = example.pop('passage') + return example + + def only_number(example): + for i in example['answers_spans']['types']: + if i == 'number': + return True + return False + + dataset = dataset.filter(only_number) + dataset = dataset.map(pre_process).remove_columns( + ['section_id', 'query_id']) + return DatasetDict({'validation': dataset}) diff --git a/opencompass/datasets/gsm8k.py b/opencompass/datasets/gsm8k.py new file mode 100644 index 00000000..089a5a7c --- /dev/null +++ b/opencompass/datasets/gsm8k.py @@ -0,0 +1,28 @@ +from opencompass.registry import TEXT_POSTPROCESSORS + + +@TEXT_POSTPROCESSORS.register_module('gsm8k_dataset') +def gsm8k_dataset_postprocess(text: str) -> str: + return text.split('#### ')[1].replace(',', '') + + +@TEXT_POSTPROCESSORS.register_module('gsm8k') +def gsm8k_postprocess(text: str) -> str: + text = text.split('\n\n')[0] + text = text.split(' ')[::-1] + flag = False + ret = '' + for i in range(len(text)): + s = text[i] + for i in range(len(s)): + if s[i].isdigit(): + flag = True + ret = s + break + if flag: + break + ret1 = '' + for i in range(len(ret)): + if ret[i].isdigit(): + ret1 += ret[i] + return ret1 diff --git a/opencompass/datasets/lcsts.py b/opencompass/datasets/lcsts.py new file mode 100644 index 00000000..92648f97 --- /dev/null +++ b/opencompass/datasets/lcsts.py @@ -0,0 +1,40 @@ +import os.path as osp + +from datasets import Dataset + +from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class LCSTSDataset(BaseDataset): + + @staticmethod + def load(path: str): + src_path = osp.join(path, 'test.src.txt') + tgt_path = osp.join(path, 'test.tgt.txt') + + src_lines = open(src_path, 'r', encoding='utf-8').readlines() + tgt_lines = open(tgt_path, 'r', encoding='utf-8').readlines() + + data = {'content': [], 'abst': []} + + for _, (src_text, tgt_text) in enumerate(zip(src_lines, tgt_lines)): + data['content'].append(src_text.strip()) + data['abst'].append(tgt_text.strip()) + + dataset = Dataset.from_dict({ + 'content': data['content'], + 'abst': data['abst'] + }) + return dataset + + +@TEXT_POSTPROCESSORS.register_module('lcsts') +def lcsts_postprocess(text: str) -> str: + text = text.strip().split('\n')[0].replace('своей', '').strip() + text = text.replace('1. ', '') if text.startswith('1. ') else text + text = text.replace('- ', '') if text.startswith('- ') else text + text = text.strip('“,。!”') + return text diff --git a/opencompass/datasets/obqa.py b/opencompass/datasets/obqa.py new file mode 100644 index 00000000..9748b113 --- /dev/null +++ b/opencompass/datasets/obqa.py @@ -0,0 +1,21 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class OBQADataset(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs) + + def pre_process(example): + for i in range(4): + example[chr(ord('A') + i)] = example['choices']['text'][i] + return example + + dataset = dataset.map(pre_process).remove_columns(['id', 'choices']) + return dataset diff --git a/opencompass/datasets/summscreen.py b/opencompass/datasets/summscreen.py new file mode 100644 index 00000000..fb3707e2 --- /dev/null +++ b/opencompass/datasets/summscreen.py @@ -0,0 +1,44 @@ +from datasets import Dataset, DatasetDict + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class SummScreenDataset(BaseDataset): + + @staticmethod + def load(path: str): + import json + import os + dataset_dict = DatasetDict() + split = 'dev' + dev_list = [] + fd_folder = os.path.join(path, 'SummScreen_raw', 'fd') + files = os.listdir(fd_folder) + for file in files: + filename = os.path.join(fd_folder, file) + with open(filename, 'r') as f: + data = json.load(f) + summary = ''.join(data['Recap']) + content = '\n'.join(data['Transcript']) + dev_list.append({ + 'content': content, + 'summary': summary, + }) + + tms_folder = os.path.join(path, 'SummScreen_raw', 'tms') + files = os.listdir(tms_folder) + for file in files: + filename = os.path.join(tms_folder, file) + with open(filename, 'r') as f: + data = json.load(f) + summary = ''.join(data['Recap']) + content = '\n'.join(data['Transcript']) + dev_list.append({ + 'content': content, + 'summary': summary, + }) + dataset_dict[split] = Dataset.from_list(dev_list) + return dataset_dict diff --git a/opencompass/datasets/wic.py b/opencompass/datasets/wic.py new file mode 100644 index 00000000..7b36161b --- /dev/null +++ b/opencompass/datasets/wic.py @@ -0,0 +1,41 @@ +import json + +from datasets import Dataset, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class WiCDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + + dataset = load_dataset(**kwargs) + + def preprocess(example): + if example['label'] == 'true': + example['answer'] = 1 + else: + example['answer'] = 0 + + return example + + dataset = dataset.map(preprocess) + return dataset + + +@LOAD_DATASET.register_module() +class WiCDataset_V2(BaseDataset): + + @staticmethod + def load(path): + dataset = [] + with open(path, 'r') as f: + for line in f: + line = json.loads(line) + line['label'] = {'true': 'A', 'false': 'B'}[line['label']] + dataset.append(line) + return Dataset.from_list(dataset) diff --git a/opencompass/openicl/__init__.py b/opencompass/openicl/__init__.py new file mode 100644 index 00000000..107406c3 --- /dev/null +++ b/opencompass/openicl/__init__.py @@ -0,0 +1,5 @@ +from .icl_dataset_reader import DatasetReader +from .icl_evaluator import * +from .icl_prompt_template import PromptTemplate +from .icl_retriever import * +from .icl_inferencer import *