mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Fullbench v0.1 language update (#1463)
* update * update * update * update
This commit is contained in:
parent
463231c651
commit
245664f4c0
53
configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
53
configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||||
|
|
||||||
|
{question}
|
||||||
|
|
||||||
|
A. {textA}
|
||||||
|
B. {textB}
|
||||||
|
C. {textC}
|
||||||
|
D. {textD}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey')
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt=QUERY_TEMPLATE)
|
||||||
|
], ),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='ARC-c',
|
||||||
|
type=ARCDataset,
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
48
configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
Normal file
48
configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt='{answerKey}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='ARC-c',
|
||||||
|
type=ARCDataset,
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,55 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.utils.text_postprocessors import (
|
||||||
|
first_option_postprocess,
|
||||||
|
)
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
|
||||||
|
|
||||||
|
Passage: {passage}
|
||||||
|
|
||||||
|
Question: {question}
|
||||||
|
|
||||||
|
A. Yes
|
||||||
|
B. NO
|
||||||
|
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt='{label}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -33,7 +33,7 @@ BoolQ_datasets = [
|
|||||||
dict(
|
dict(
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
type=BoolQDatasetV2,
|
type=BoolQDatasetV2,
|
||||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
path='opencompass/boolq',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
eval_cfg=BoolQ_eval_cfg,
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A':
|
||||||
|
dict(round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]),
|
||||||
|
'B':
|
||||||
|
dict(round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -35,7 +35,7 @@ BoolQ_datasets = [
|
|||||||
dict(
|
dict(
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
type=BoolQDatasetV3,
|
type=BoolQDatasetV3,
|
||||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
path='opencompass/boolq',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
eval_cfg=BoolQ_eval_cfg,
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
@ -26,7 +26,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
68
configs/datasets/race/race_cot_gen_d95929.py
Normal file
68
configs/datasets/race/race_cot_gen_d95929.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
from opencompass.utils.text_postprocessors import (
|
||||||
|
first_option_postprocess,
|
||||||
|
)
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||||
|
|
||||||
|
Article: {article}
|
||||||
|
|
||||||
|
Q: {question}
|
||||||
|
|
||||||
|
A. {A}
|
||||||
|
B. {B}
|
||||||
|
C. {C}
|
||||||
|
D. {D}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||||
|
pred_role='BOT',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
53
configs/datasets/race/race_few_shot_gen_a498ed.py
Normal file
53
configs/datasets/race/race_few_shot_gen_a498ed.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test'
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
|
||||||
|
dict(role='BOT', prompt='{answer}'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
pred_role='BOT')
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg)
|
||||||
|
]
|
15
configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
15
configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='glm-4-9b-chat-turbomind',
|
||||||
|
path='THUDM/glm-4-9b-chat',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=8192,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
15
configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
15
configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import TurboMindModel
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='llama-3.1-8b-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B',
|
||||||
|
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
16
configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
Normal file
16
configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='llama-3.1-8b-instruct-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
53
opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
53
opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||||
|
|
||||||
|
{question}
|
||||||
|
|
||||||
|
A. {textA}
|
||||||
|
B. {textB}
|
||||||
|
C. {textC}
|
||||||
|
D. {textD}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey')
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt=QUERY_TEMPLATE)
|
||||||
|
], ),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='ARC-c',
|
||||||
|
type=ARCDataset,
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,48 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt='{answerKey}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='ARC-c',
|
||||||
|
type=ARCDataset,
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,55 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.utils.text_postprocessors import (
|
||||||
|
first_option_postprocess,
|
||||||
|
)
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
|
||||||
|
|
||||||
|
Passage: {passage}
|
||||||
|
|
||||||
|
Question: {question}
|
||||||
|
|
||||||
|
A. Yes
|
||||||
|
B. NO
|
||||||
|
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt='{label}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -33,7 +33,7 @@ BoolQ_datasets = [
|
|||||||
dict(
|
dict(
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
type=BoolQDatasetV2,
|
type=BoolQDatasetV2,
|
||||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
path='opencompass/boolq',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
eval_cfg=BoolQ_eval_cfg,
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A':
|
||||||
|
dict(round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]),
|
||||||
|
'B':
|
||||||
|
dict(round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -35,7 +35,7 @@ BoolQ_datasets = [
|
|||||||
dict(
|
dict(
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
type=BoolQDatasetV3,
|
type=BoolQDatasetV3,
|
||||||
path='./data/SuperGLUE/BoolQ/val.jsonl',
|
path='opencompass/boolq',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
eval_cfg=BoolQ_eval_cfg,
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
@ -36,7 +36,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
@ -26,7 +26,7 @@ BoolQ_datasets = [
|
|||||||
type=BoolQDataset,
|
type=BoolQDataset,
|
||||||
abbr='BoolQ',
|
abbr='BoolQ',
|
||||||
path='json',
|
path='json',
|
||||||
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
|
data_files='opencompass/boolq',
|
||||||
split='train',
|
split='train',
|
||||||
reader_cfg=BoolQ_reader_cfg,
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
infer_cfg=BoolQ_infer_cfg,
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
68
opencompass/configs/datasets/race/race_cot_gen_d95929.py
Normal file
68
opencompass/configs/datasets/race/race_cot_gen_d95929.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
from opencompass.utils.text_postprocessors import (
|
||||||
|
first_option_postprocess,
|
||||||
|
)
|
||||||
|
|
||||||
|
QUERY_TEMPLATE = """
|
||||||
|
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||||
|
|
||||||
|
Article: {article}
|
||||||
|
|
||||||
|
Q: {question}
|
||||||
|
|
||||||
|
A. {A}
|
||||||
|
B. {B}
|
||||||
|
C. {C}
|
||||||
|
D. {D}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
||||||
|
pred_role='BOT',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,53 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test'
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
|
||||||
|
dict(role='BOT', prompt='{answer}'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
pred_role='BOT')
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg)
|
||||||
|
]
|
15
opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
15
opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='glm-4-9b-chat-turbomind',
|
||||||
|
path='THUDM/glm-4-9b-chat',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=8192,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
15
opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
15
opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import TurboMindModel
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='llama-3.1-8b-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B',
|
||||||
|
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,16 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='llama-3.1-8b-instruct-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -31,7 +31,7 @@ class BoolQDatasetV2(BaseDataset):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path):
|
def load(path):
|
||||||
path = get_data_path(path, local_mode=True)
|
path = get_data_path(path)
|
||||||
dataset = []
|
dataset = []
|
||||||
with open(path, 'r') as f:
|
with open(path, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
|
@ -4,7 +4,7 @@ from .icl_dpp_retriever import DPPRetriever # noqa
|
|||||||
from .icl_fix_k_retriever import FixKRetriever # noqa
|
from .icl_fix_k_retriever import FixKRetriever # noqa
|
||||||
from .icl_mdl_retriever import MDLRetriever # noqa
|
from .icl_mdl_retriever import MDLRetriever # noqa
|
||||||
from .icl_random_retriever import RandomRetriever # noqa
|
from .icl_random_retriever import RandomRetriever # noqa
|
||||||
|
from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa
|
||||||
from .icl_topk_retriever import TopkRetriever # noqa
|
from .icl_topk_retriever import TopkRetriever # noqa
|
||||||
from .icl_votek_retriever import VotekRetriever # noqa
|
from .icl_votek_retriever import VotekRetriever # noqa
|
||||||
from .icl_zero_retriever import ZeroRetriever # noqa
|
from .icl_zero_retriever import ZeroRetriever # noqa
|
||||||
from .icl_sliding_k_retriever import SlidingWindowRetriever # noqa
|
|
||||||
|
@ -51,8 +51,8 @@ class SlidingWindowRetriever(BaseRetriever):
|
|||||||
for current_index in trange(len(self.test_ds),
|
for current_index in trange(len(self.test_ds),
|
||||||
disable=not self.is_main_process):
|
disable=not self.is_main_process):
|
||||||
if current_index < self.k:
|
if current_index < self.k:
|
||||||
"""For the first few examples,
|
"""For the first few examples, get the previous ones and pad
|
||||||
get the previous ones and pad with the last ones"""
|
with the last ones."""
|
||||||
start_index = max(0, current_index - self.k)
|
start_index = max(0, current_index - self.k)
|
||||||
previous_shots = list(range(start_index, current_index))
|
previous_shots = list(range(start_index, current_index))
|
||||||
if len(previous_shots) < self.k:
|
if len(previous_shots) < self.k:
|
||||||
|
@ -203,7 +203,7 @@ DATASETS_MAPPING = {
|
|||||||
"opencompass/race": {
|
"opencompass/race": {
|
||||||
"ms_id": "opencompass/race",
|
"ms_id": "opencompass/race",
|
||||||
"hf_id": "opencompass/race",
|
"hf_id": "opencompass/race",
|
||||||
"local": "./data/race",
|
"local": "./data/race/",
|
||||||
},
|
},
|
||||||
# SIQA
|
# SIQA
|
||||||
"opencompass/siqa": {
|
"opencompass/siqa": {
|
||||||
@ -229,6 +229,12 @@ DATASETS_MAPPING = {
|
|||||||
"hf_id": "opencompass/summedits",
|
"hf_id": "opencompass/summedits",
|
||||||
"local": "./data/summedits/summedits.jsonl",
|
"local": "./data/summedits/summedits.jsonl",
|
||||||
},
|
},
|
||||||
|
# SuperGLUE
|
||||||
|
"opencompass/boolq": {
|
||||||
|
"ms_id": "opencompass/boolq",
|
||||||
|
"hf_id": "opencompass/boolq",
|
||||||
|
"local": "./data/SuperGLUE/BoolQ/val.jsonl",
|
||||||
|
},
|
||||||
# TriviaQA
|
# TriviaQA
|
||||||
"opencompass/trivia_qa": {
|
"opencompass/trivia_qa": {
|
||||||
"ms_id": "opencompass/trivia_qa",
|
"ms_id": "opencompass/trivia_qa",
|
||||||
@ -292,10 +298,6 @@ DATASETS_URL = {
|
|||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/BBH.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/BBH.zip",
|
||||||
"md5": "60c49f9bef5148aa7e1941328e96a554",
|
"md5": "60c49f9bef5148aa7e1941328e96a554",
|
||||||
},
|
},
|
||||||
"/mmlu/": {
|
|
||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
|
|
||||||
"md5": "761310671509a239e41c4b717f7fab9c",
|
|
||||||
},
|
|
||||||
"/compass_arena/": {
|
"/compass_arena/": {
|
||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/compass_arena.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/compass_arena.zip",
|
||||||
"md5": "cd59b54a179d16f2a858b359b60588f6",
|
"md5": "cd59b54a179d16f2a858b359b60588f6",
|
||||||
@ -367,5 +369,17 @@ DATASETS_URL = {
|
|||||||
"FewCLUE": {
|
"FewCLUE": {
|
||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/FewCLUE.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/FewCLUE.zip",
|
||||||
"md5": "7976e2bb0e9d885ffd3c55f7c5d4021e",
|
"md5": "7976e2bb0e9d885ffd3c55f7c5d4021e",
|
||||||
}
|
},
|
||||||
|
"/race": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/race.zip",
|
||||||
|
"md5": "b758251764a264746cf45749c02363f9",
|
||||||
|
},
|
||||||
|
"/ARC": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ARC.zip",
|
||||||
|
"md5": "d720629b69f1a51cfe78bf65b00b44f6",
|
||||||
|
},
|
||||||
|
"/SuperGLUE": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SuperGLUE.zip",
|
||||||
|
"md5": "b60904915b0b61d1a04ea52280169936",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -98,6 +98,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
|
|||||||
f'答案是\s?(\S+)(?:。|$)',
|
f'答案是\s?(\S+)(?:。|$)',
|
||||||
f'答案应该是\s?(\S+)(?:。|$)',
|
f'答案应该是\s?(\S+)(?:。|$)',
|
||||||
f'答案为\s?(\S+)(?:。|$)',
|
f'答案为\s?(\S+)(?:。|$)',
|
||||||
|
f'(?i)ANSWER\s*:\s*([{options}])',
|
||||||
f'[Tt]he answer is:?\s+\(?([{options}])\)?',
|
f'[Tt]he answer is:?\s+\(?([{options}])\)?',
|
||||||
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
|
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
|
||||||
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
|
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
|
||||||
|
Loading…
Reference in New Issue
Block a user