diff --git a/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py b/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py new file mode 100644 index 00000000..5bbb25c9 --- /dev/null +++ b/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset +from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern + +QUERY_TEMPLATE = """ +Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering. + +{question} + +A. {textA} +B. {textB} +C. {textC} +D. {textD} +""".strip() + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey') + +ARC_c_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role='HUMAN', + prompt=QUERY_TEMPLATE) + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +ARC_c_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), +) + +ARC_c_datasets = [ + dict( + abbr='ARC-c', + type=ARCDataset, + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py b/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py new file mode 100644 index 00000000..25193f04 --- /dev/null +++ b/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py @@ -0,0 +1,48 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset +from opencompass.utils.text_postprocessors import first_capital_postprocess + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:', + ), + dict(role='BOT', prompt='{answerKey}'), + ], + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +ARC_c_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_capital_postprocess), +) + +ARC_c_datasets = [ + dict( + abbr='ARC-c', + type=ARCDataset, + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py new file mode 100644 index 00000000..3ab8320b --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py @@ -0,0 +1,55 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.utils.text_postprocessors import ( + first_option_postprocess, +) + +QUERY_TEMPLATE = """ +Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering. + +Passage: {passage} + +Question: {question} + +A. Yes +B. NO + +""".strip() + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt=QUERY_TEMPLATE), + ] + ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +BoolQ_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), +) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py new file mode 100644 index 00000000..188b774a --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.utils.text_postprocessors import first_capital_postprocess + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:', + ), + dict(role='BOT', prompt='{label}'), + ], + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_capital_postprocess), +) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py index 01301860..78f368bd 100644 --- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py @@ -33,7 +33,7 @@ BoolQ_datasets = [ dict( abbr='BoolQ', type=BoolQDatasetV2, - path='./data/SuperGLUE/BoolQ/val.jsonl', + path='opencompass/boolq', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, eval_cfg=BoolQ_eval_cfg, diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py new file mode 100644 index 00000000..e24ea1e6 --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 'A': + dict(round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ]), + 'B': + dict(round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ]), + }, + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py index cb0980ce..9e9c2ff0 100644 --- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py @@ -35,7 +35,7 @@ BoolQ_datasets = [ dict( abbr='BoolQ', type=BoolQDatasetV3, - path='./data/SuperGLUE/BoolQ/val.jsonl', + path='opencompass/boolq', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, eval_cfg=BoolQ_eval_cfg, diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py index dc2c8765..0d610ba5 100644 --- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py @@ -36,7 +36,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py index 5343bc40..9af5893b 100644 --- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py @@ -36,7 +36,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py index 312a3705..47dfb6cb 100644 --- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py @@ -26,7 +26,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/configs/datasets/race/race_cot_gen_d95929.py b/configs/datasets/race/race_cot_gen_d95929.py new file mode 100644 index 00000000..e92c55e8 --- /dev/null +++ b/configs/datasets/race/race_cot_gen_d95929.py @@ -0,0 +1,68 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset +from opencompass.utils.text_postprocessors import ( + first_option_postprocess, +) + +QUERY_TEMPLATE = """ +Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering. + +Article: {article} + +Q: {question} + +A. {A} +B. {B} +C. {C} +D. {D} +""".strip() + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt=QUERY_TEMPLATE), + ] + ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), + pred_role='BOT', +) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +] diff --git a/configs/datasets/race/race_few_shot_gen_a498ed.py b/configs/datasets/race/race_few_shot_gen_a498ed.py new file mode 100644 index 00000000..f33479ef --- /dev/null +++ b/configs/datasets/race/race_few_shot_gen_a498ed.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset +from opencompass.utils.text_postprocessors import first_capital_postprocess + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test' +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'), + dict(role='BOT', prompt='{answer}'), + ] + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_capital_postprocess), + pred_role='BOT') + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg) +] diff --git a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py new file mode 100644 index 00000000..2f8218a6 --- /dev/null +++ b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='glm-4-9b-chat-turbomind', + path='THUDM/glm-4-9b-chat', + engine_config=dict(max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=8192, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_8b.py b/configs/models/hf_llama/lmdeploy_llama3_1_8b.py new file mode 100644 index 00000000..a0b3cc5e --- /dev/null +++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModel + +models = [ + dict( + type=TurboMindModel, + abbr='llama-3.1-8b-turbomind', + path='meta-llama/Meta-Llama-3.1-8B', + engine_config=dict(session_len=7168, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py new file mode 100644 index 00000000..b7dedb72 --- /dev/null +++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -0,0 +1,16 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='llama-3.1-8b-instruct-turbomind', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + engine_config=dict(max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py b/opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py new file mode 100644 index 00000000..5bbb25c9 --- /dev/null +++ b/opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset +from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern + +QUERY_TEMPLATE = """ +Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering. + +{question} + +A. {textA} +B. {textB} +C. {textC} +D. {textD} +""".strip() + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey') + +ARC_c_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role='HUMAN', + prompt=QUERY_TEMPLATE) + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +ARC_c_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), +) + +ARC_c_datasets = [ + dict( + abbr='ARC-c', + type=ARCDataset, + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py new file mode 100644 index 00000000..25193f04 --- /dev/null +++ b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py @@ -0,0 +1,48 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset +from opencompass.utils.text_postprocessors import first_capital_postprocess + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:', + ), + dict(role='BOT', prompt='{answerKey}'), + ], + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +ARC_c_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_capital_postprocess), +) + +ARC_c_datasets = [ + dict( + abbr='ARC-c', + type=ARCDataset, + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py new file mode 100644 index 00000000..3ab8320b --- /dev/null +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py @@ -0,0 +1,55 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.utils.text_postprocessors import ( + first_option_postprocess, +) + +QUERY_TEMPLATE = """ +Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering. + +Passage: {passage} + +Question: {question} + +A. Yes +B. NO + +""".strip() + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt=QUERY_TEMPLATE), + ] + ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +BoolQ_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), +) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py new file mode 100644 index 00000000..188b774a --- /dev/null +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.utils.text_postprocessors import first_capital_postprocess + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:', + ), + dict(role='BOT', prompt='{label}'), + ], + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_capital_postprocess), +) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py index 01301860..78f368bd 100644 --- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py @@ -33,7 +33,7 @@ BoolQ_datasets = [ dict( abbr='BoolQ', type=BoolQDatasetV2, - path='./data/SuperGLUE/BoolQ/val.jsonl', + path='opencompass/boolq', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, eval_cfg=BoolQ_eval_cfg, diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py new file mode 100644 index 00000000..e24ea1e6 --- /dev/null +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 'A': + dict(round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ]), + 'B': + dict(round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ]), + }, + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py index cb0980ce..9e9c2ff0 100644 --- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py @@ -35,7 +35,7 @@ BoolQ_datasets = [ dict( abbr='BoolQ', type=BoolQDatasetV3, - path='./data/SuperGLUE/BoolQ/val.jsonl', + path='opencompass/boolq', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, eval_cfg=BoolQ_eval_cfg, diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py index dc2c8765..0d610ba5 100644 --- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py @@ -36,7 +36,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py index 5343bc40..9af5893b 100644 --- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py @@ -36,7 +36,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py index 312a3705..47dfb6cb 100644 --- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py @@ -26,7 +26,7 @@ BoolQ_datasets = [ type=BoolQDataset, abbr='BoolQ', path='json', - data_files='./data/SuperGLUE/BoolQ/val.jsonl', + data_files='opencompass/boolq', split='train', reader_cfg=BoolQ_reader_cfg, infer_cfg=BoolQ_infer_cfg, diff --git a/opencompass/configs/datasets/race/race_cot_gen_d95929.py b/opencompass/configs/datasets/race/race_cot_gen_d95929.py new file mode 100644 index 00000000..e92c55e8 --- /dev/null +++ b/opencompass/configs/datasets/race/race_cot_gen_d95929.py @@ -0,0 +1,68 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset +from opencompass.utils.text_postprocessors import ( + first_option_postprocess, +) + +QUERY_TEMPLATE = """ +Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering. + +Article: {article} + +Q: {question} + +A. {A} +B. {B} +C. {C} +D. {D} +""".strip() + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt=QUERY_TEMPLATE), + ] + ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), + pred_role='BOT', +) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +] diff --git a/opencompass/configs/datasets/race/race_few_shot_gen_a498ed.py b/opencompass/configs/datasets/race/race_few_shot_gen_a498ed.py new file mode 100644 index 00000000..f33479ef --- /dev/null +++ b/opencompass/configs/datasets/race/race_few_shot_gen_a498ed.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset +from opencompass.utils.text_postprocessors import first_capital_postprocess + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test' +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'), + dict(role='BOT', prompt='{answer}'), + ] + ), + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=GenInferencer, max_out_len=50), +) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_capital_postprocess), + pred_role='BOT') + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg) +] diff --git a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py new file mode 100644 index 00000000..2f8218a6 --- /dev/null +++ b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='glm-4-9b-chat-turbomind', + path='THUDM/glm-4-9b-chat', + engine_config=dict(max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=8192, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py new file mode 100644 index 00000000..a0b3cc5e --- /dev/null +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModel + +models = [ + dict( + type=TurboMindModel, + abbr='llama-3.1-8b-turbomind', + path='meta-llama/Meta-Llama-3.1-8B', + engine_config=dict(session_len=7168, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py new file mode 100644 index 00000000..b7dedb72 --- /dev/null +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -0,0 +1,16 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='llama-3.1-8b-instruct-turbomind', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + engine_config=dict(max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=1), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/opencompass/datasets/boolq.py b/opencompass/datasets/boolq.py index 318b13da..634ab1d5 100644 --- a/opencompass/datasets/boolq.py +++ b/opencompass/datasets/boolq.py @@ -31,7 +31,7 @@ class BoolQDatasetV2(BaseDataset): @staticmethod def load(path): - path = get_data_path(path, local_mode=True) + path = get_data_path(path) dataset = [] with open(path, 'r') as f: for line in f: diff --git a/opencompass/openicl/icl_retriever/__init__.py b/opencompass/openicl/icl_retriever/__init__.py index 1f39b3f8..b48cdd8f 100644 --- a/opencompass/openicl/icl_retriever/__init__.py +++ b/opencompass/openicl/icl_retriever/__init__.py @@ -4,7 +4,7 @@ from .icl_dpp_retriever import DPPRetriever # noqa from .icl_fix_k_retriever import FixKRetriever # noqa from .icl_mdl_retriever import MDLRetriever # noqa from .icl_random_retriever import RandomRetriever # noqa +from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa from .icl_topk_retriever import TopkRetriever # noqa from .icl_votek_retriever import VotekRetriever # noqa from .icl_zero_retriever import ZeroRetriever # noqa -from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa diff --git a/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py b/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py index f9cfe8f0..141b94bd 100644 --- a/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py +++ b/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py @@ -51,8 +51,8 @@ class SlidingWindowRetriever(BaseRetriever): for current_index in trange(len(self.test_ds), disable=not self.is_main_process): if current_index < self.k: - """For the first few examples, - get the previous ones and pad with the last ones""" + """For the first few examples, get the previous ones and pad + with the last ones.""" start_index = max(0, current_index - self.k) previous_shots = list(range(start_index, current_index)) if len(previous_shots) < self.k: diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index f27fed08..f097c5c3 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -203,7 +203,7 @@ DATASETS_MAPPING = { "opencompass/race": { "ms_id": "opencompass/race", "hf_id": "opencompass/race", - "local": "./data/race", + "local": "./data/race/", }, # SIQA "opencompass/siqa": { @@ -229,6 +229,12 @@ DATASETS_MAPPING = { "hf_id": "opencompass/summedits", "local": "./data/summedits/summedits.jsonl", }, + # SuperGLUE + "opencompass/boolq": { + "ms_id": "opencompass/boolq", + "hf_id": "opencompass/boolq", + "local": "./data/SuperGLUE/BoolQ/val.jsonl", + }, # TriviaQA "opencompass/trivia_qa": { "ms_id": "opencompass/trivia_qa", @@ -292,10 +298,6 @@ DATASETS_URL = { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/BBH.zip", "md5": "60c49f9bef5148aa7e1941328e96a554", }, - "/mmlu/": { - "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip", - "md5": "761310671509a239e41c4b717f7fab9c", - }, "/compass_arena/": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/compass_arena.zip", "md5": "cd59b54a179d16f2a858b359b60588f6", @@ -367,5 +369,17 @@ DATASETS_URL = { "FewCLUE": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/FewCLUE.zip", "md5": "7976e2bb0e9d885ffd3c55f7c5d4021e", - } + }, + "/race": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/race.zip", + "md5": "b758251764a264746cf45749c02363f9", + }, + "/ARC": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ARC.zip", + "md5": "d720629b69f1a51cfe78bf65b00b44f6", + }, + "/SuperGLUE": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SuperGLUE.zip", + "md5": "b60904915b0b61d1a04ea52280169936", + }, } diff --git a/opencompass/utils/text_postprocessors.py b/opencompass/utils/text_postprocessors.py index fc12e064..aeb4a0e5 100644 --- a/opencompass/utils/text_postprocessors.py +++ b/opencompass/utils/text_postprocessors.py @@ -98,6 +98,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str: f'答案是\s?(\S+)(?:。|$)', f'答案应该是\s?(\S+)(?:。|$)', f'答案为\s?(\S+)(?:。|$)', + f'(?i)ANSWER\s*:\s*([{options}])', f'[Tt]he answer is:?\s+\(?([{options}])\)?', f'[Tt]he answer is option:?\s+\(?([{options}])\)?', f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',