mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Fullbench v0.1 language update (#1463)
* update * update * update * update
This commit is contained in:
parent
463231c651
commit
245664f4c0
53
configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
53
configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||
|
||||
{question}
|
||||
|
||||
A. {textA}
|
||||
B. {textB}
|
||||
C. {textC}
|
||||
D. {textD}
|
||||
""".strip()
|
||||
|
||||
ARC_c_reader_cfg = dict(
|
||||
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||
output_column='answerKey')
|
||||
|
||||
ARC_c_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt=QUERY_TEMPLATE)
|
||||
], ),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
ARC_c_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||
)
|
||||
|
||||
ARC_c_datasets = [
|
||||
dict(
|
||||
abbr='ARC-c',
|
||||
type=ARCDataset,
|
||||
path='opencompass/ai2_arc-dev',
|
||||
name='ARC-Challenge',
|
||||
reader_cfg=ARC_c_reader_cfg,
|
||||
infer_cfg=ARC_c_infer_cfg,
|
||||
eval_cfg=ARC_c_eval_cfg,
|
||||
)
|
||||
]
|
48
configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
Normal file
48
configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
Normal file
@ -0,0 +1,48 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ARC_c_reader_cfg = dict(
|
||||
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||
output_column='answerKey',
|
||||
)
|
||||
|
||||
ARC_c_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
|
||||
),
|
||||
dict(role='BOT', prompt='{answerKey}'),
|
||||
],
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
ARC_c_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ARC_c_datasets = [
|
||||
dict(
|
||||
abbr='ARC-c',
|
||||
type=ARCDataset,
|
||||
path='opencompass/ai2_arc-dev',
|
||||
name='ARC-Challenge',
|
||||
reader_cfg=ARC_c_reader_cfg,
|
||||
infer_cfg=ARC_c_infer_cfg,
|
||||
eval_cfg=ARC_c_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,55 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
from opencompass.utils.text_postprocessors import (
|
||||
first_option_postprocess,
|
||||
)
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
|
||||
|
||||
Passage: {passage}
|
||||
|
||||
Question: {question}
|
||||
|
||||
A. Yes
|
||||
B. NO
|
||||
|
||||
""".strip()
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||
]
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
|
||||
)
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,47 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
|
||||
),
|
||||
dict(role='BOT', prompt='{label}'),
|
||||
],
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -33,7 +33,7 @@ BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
|
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
'A':
|
||||
dict(round=[
|
||||
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||
dict(role='BOT', prompt='Yes'),
|
||||
]),
|
||||
'B':
|
||||
dict(round=[
|
||||
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||
dict(role='BOT', prompt='No'),
|
||||
]),
|
||||
},
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -35,7 +35,7 @@ BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV3,
|
||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
@ -26,7 +26,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
68
configs/datasets/race/race_cot_gen_d95929.py
Normal file
68
configs/datasets/race/race_cot_gen_d95929.py
Normal file
@ -0,0 +1,68 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import (
|
||||
first_option_postprocess,
|
||||
)
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||
|
||||
Article: {article}
|
||||
|
||||
Q: {question}
|
||||
|
||||
A. {A}
|
||||
B. {B}
|
||||
C. {C}
|
||||
D. {D}
|
||||
""".strip()
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='test',
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||
]
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
abbr='race-middle',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg,
|
||||
),
|
||||
dict(
|
||||
abbr='race-high',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg,
|
||||
),
|
||||
]
|
53
configs/datasets/race/race_few_shot_gen_a498ed.py
Normal file
53
configs/datasets/race/race_few_shot_gen_a498ed.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='test'
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
|
||||
dict(role='BOT', prompt='{answer}'),
|
||||
]
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
abbr='race-middle',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
abbr='race-high',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg)
|
||||
]
|
15
configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
15
configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='glm-4-9b-chat-turbomind',
|
||||
path='THUDM/glm-4-9b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=8192,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
15
configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModel,
|
||||
abbr='llama-3.1-8b-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
16
configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
Normal file
16
configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='llama-3.1-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
)
|
||||
]
|
53
opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
53
opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||
|
||||
{question}
|
||||
|
||||
A. {textA}
|
||||
B. {textB}
|
||||
C. {textC}
|
||||
D. {textD}
|
||||
""".strip()
|
||||
|
||||
ARC_c_reader_cfg = dict(
|
||||
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||
output_column='answerKey')
|
||||
|
||||
ARC_c_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt=QUERY_TEMPLATE)
|
||||
], ),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
ARC_c_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||
)
|
||||
|
||||
ARC_c_datasets = [
|
||||
dict(
|
||||
abbr='ARC-c',
|
||||
type=ARCDataset,
|
||||
path='opencompass/ai2_arc-dev',
|
||||
name='ARC-Challenge',
|
||||
reader_cfg=ARC_c_reader_cfg,
|
||||
infer_cfg=ARC_c_infer_cfg,
|
||||
eval_cfg=ARC_c_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,48 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import ARCDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
ARC_c_reader_cfg = dict(
|
||||
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||
output_column='answerKey',
|
||||
)
|
||||
|
||||
ARC_c_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
|
||||
),
|
||||
dict(role='BOT', prompt='{answerKey}'),
|
||||
],
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
ARC_c_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
ARC_c_datasets = [
|
||||
dict(
|
||||
abbr='ARC-c',
|
||||
type=ARCDataset,
|
||||
path='opencompass/ai2_arc-dev',
|
||||
name='ARC-Challenge',
|
||||
reader_cfg=ARC_c_reader_cfg,
|
||||
infer_cfg=ARC_c_infer_cfg,
|
||||
eval_cfg=ARC_c_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,55 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
from opencompass.utils.text_postprocessors import (
|
||||
first_option_postprocess,
|
||||
)
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
|
||||
|
||||
Passage: {passage}
|
||||
|
||||
Question: {question}
|
||||
|
||||
A. Yes
|
||||
B. NO
|
||||
|
||||
""".strip()
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||
]
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
|
||||
)
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -0,0 +1,47 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
|
||||
),
|
||||
dict(role='BOT', prompt='{label}'),
|
||||
],
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role='BOT',
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
)
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -33,7 +33,7 @@ BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
|
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import BoolQDatasetV2
|
||||
|
||||
BoolQ_reader_cfg = dict(
|
||||
input_columns=['question', 'passage'],
|
||||
output_column='label',
|
||||
)
|
||||
|
||||
BoolQ_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
'A':
|
||||
dict(round=[
|
||||
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||
dict(role='BOT', prompt='Yes'),
|
||||
]),
|
||||
'B':
|
||||
dict(round=[
|
||||
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||
dict(role='BOT', prompt='No'),
|
||||
]),
|
||||
},
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV2,
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
)
|
||||
]
|
@ -35,7 +35,7 @@ BoolQ_datasets = [
|
||||
dict(
|
||||
abbr='BoolQ',
|
||||
type=BoolQDatasetV3,
|
||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
path='opencompass/boolq',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
eval_cfg=BoolQ_eval_cfg,
|
||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
@ -26,7 +26,7 @@ BoolQ_datasets = [
|
||||
type=BoolQDataset,
|
||||
abbr='BoolQ',
|
||||
path='json',
|
||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
||||
data_files='opencompass/boolq',
|
||||
split='train',
|
||||
reader_cfg=BoolQ_reader_cfg,
|
||||
infer_cfg=BoolQ_infer_cfg,
|
||||
|
68
opencompass/configs/datasets/race/race_cot_gen_d95929.py
Normal file
68
opencompass/configs/datasets/race/race_cot_gen_d95929.py
Normal file
@ -0,0 +1,68 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import (
|
||||
first_option_postprocess,
|
||||
)
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||
|
||||
Article: {article}
|
||||
|
||||
Q: {question}
|
||||
|
||||
A. {A}
|
||||
B. {B}
|
||||
C. {C}
|
||||
D. {D}
|
||||
""".strip()
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='test',
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||
]
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
abbr='race-middle',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg,
|
||||
),
|
||||
dict(
|
||||
abbr='race-high',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg,
|
||||
),
|
||||
]
|
@ -0,0 +1,53 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import RaceDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
race_reader_cfg = dict(
|
||||
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||
output_column='answer',
|
||||
train_split='validation',
|
||||
test_split='test'
|
||||
)
|
||||
|
||||
race_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin='</E>',
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
|
||||
dict(role='BOT', prompt='{answer}'),
|
||||
]
|
||||
),
|
||||
ice_token='</E>',
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||
)
|
||||
|
||||
race_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
race_datasets = [
|
||||
dict(
|
||||
abbr='race-middle',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='middle',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg),
|
||||
dict(
|
||||
abbr='race-high',
|
||||
type=RaceDataset,
|
||||
path='opencompass/race',
|
||||
name='high',
|
||||
reader_cfg=race_reader_cfg,
|
||||
infer_cfg=race_infer_cfg,
|
||||
eval_cfg=race_eval_cfg)
|
||||
]
|
15
opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
15
opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='glm-4-9b-chat-turbomind',
|
||||
path='THUDM/glm-4-9b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=8192,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
15
opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModel,
|
||||
abbr='llama-3.1-8b-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -0,0 +1,16 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='llama-3.1-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
)
|
||||
]
|
@ -31,7 +31,7 @@ class BoolQDatasetV2(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
path = get_data_path(path, local_mode=True)
|
||||
path = get_data_path(path)
|
||||
dataset = []
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
|
@ -4,7 +4,7 @@ from .icl_dpp_retriever import DPPRetriever # noqa
|
||||
from .icl_fix_k_retriever import FixKRetriever # noqa
|
||||
from .icl_mdl_retriever import MDLRetriever # noqa
|
||||
from .icl_random_retriever import RandomRetriever # noqa
|
||||
from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa
|
||||
from .icl_topk_retriever import TopkRetriever # noqa
|
||||
from .icl_votek_retriever import VotekRetriever # noqa
|
||||
from .icl_zero_retriever import ZeroRetriever # noqa
|
||||
from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa
|
||||
|
@ -51,8 +51,8 @@ class SlidingWindowRetriever(BaseRetriever):
|
||||
for current_index in trange(len(self.test_ds),
|
||||
disable=not self.is_main_process):
|
||||
if current_index < self.k:
|
||||
"""For the first few examples,
|
||||
get the previous ones and pad with the last ones"""
|
||||
"""For the first few examples, get the previous ones and pad
|
||||
with the last ones."""
|
||||
start_index = max(0, current_index - self.k)
|
||||
previous_shots = list(range(start_index, current_index))
|
||||
if len(previous_shots) < self.k:
|
||||
|
@ -203,7 +203,7 @@ DATASETS_MAPPING = {
|
||||
"opencompass/race": {
|
||||
"ms_id": "opencompass/race",
|
||||
"hf_id": "opencompass/race",
|
||||
"local": "./data/race",
|
||||
"local": "./data/race/",
|
||||
},
|
||||
# SIQA
|
||||
"opencompass/siqa": {
|
||||
@ -229,6 +229,12 @@ DATASETS_MAPPING = {
|
||||
"hf_id": "opencompass/summedits",
|
||||
"local": "./data/summedits/summedits.jsonl",
|
||||
},
|
||||
# SuperGLUE
|
||||
"opencompass/boolq": {
|
||||
"ms_id": "opencompass/boolq",
|
||||
"hf_id": "opencompass/boolq",
|
||||
"local": "./data/SuperGLUE/BoolQ/val.jsonl",
|
||||
},
|
||||
# TriviaQA
|
||||
"opencompass/trivia_qa": {
|
||||
"ms_id": "opencompass/trivia_qa",
|
||||
@ -292,10 +298,6 @@ DATASETS_URL = {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/BBH.zip",
|
||||
"md5": "60c49f9bef5148aa7e1941328e96a554",
|
||||
},
|
||||
"/mmlu/": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
|
||||
"md5": "761310671509a239e41c4b717f7fab9c",
|
||||
},
|
||||
"/compass_arena/": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/compass_arena.zip",
|
||||
"md5": "cd59b54a179d16f2a858b359b60588f6",
|
||||
@ -367,5 +369,17 @@ DATASETS_URL = {
|
||||
"FewCLUE": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/FewCLUE.zip",
|
||||
"md5": "7976e2bb0e9d885ffd3c55f7c5d4021e",
|
||||
}
|
||||
},
|
||||
"/race": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/race.zip",
|
||||
"md5": "b758251764a264746cf45749c02363f9",
|
||||
},
|
||||
"/ARC": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ARC.zip",
|
||||
"md5": "d720629b69f1a51cfe78bf65b00b44f6",
|
||||
},
|
||||
"/SuperGLUE": {
|
||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SuperGLUE.zip",
|
||||
"md5": "b60904915b0b61d1a04ea52280169936",
|
||||
},
|
||||
}
|
||||
|
@ -98,6 +98,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
|
||||
f'答案是\s?(\S+)(?:。|$)',
|
||||
f'答案应该是\s?(\S+)(?:。|$)',
|
||||
f'答案为\s?(\S+)(?:。|$)',
|
||||
f'(?i)ANSWER\s*:\s*([{options}])',
|
||||
f'[Tt]he answer is:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
|
||||
|
Loading…
Reference in New Issue
Block a user