mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge branch 'open-compass:main' into main
This commit is contained in:
commit
5c5d5c119c
63
configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
63
configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textA}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'B': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textB}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'C': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textC}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'D': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textD}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
type=ARCDataset,
|
||||||
|
abbr='ARC-c',
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'B': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
'A': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
57
configs/datasets/race/race_few_shot_ppl.py
Normal file
57
configs/datasets/race/race_few_shot_ppl.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
ans: dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt=f'Answer: {ans}'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for ans in ['A', 'B', 'C', 'D']
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
13
configs/models/hf_llama/hf_llama3_1_70b_instruct.py
Normal file
13
configs/models/hf_llama/hf_llama3_1_70b_instruct.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='llama-3_1-70b-instruct-hf',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=4),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
13
configs/models/hf_llama/hf_llama3_1_8b_instruct.py
Normal file
13
configs/models/hf_llama/hf_llama3_1_8b_instruct.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='llama-3_1-8b-instruct-hf',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
16
configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
Normal file
16
configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='llama-3_1-70b-instruct-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=4),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=4),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate
|
|||||||
models = [
|
models = [
|
||||||
dict(
|
dict(
|
||||||
type=TurboMindModelwithChatTemplate,
|
type=TurboMindModelwithChatTemplate,
|
||||||
abbr='llama-3.1-8b-instruct-turbomind',
|
abbr='llama-3_1-8b-instruct-turbomind',
|
||||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
engine_config=dict(max_batch_size=16, tp=1),
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
15
configs/models/mistral/vllm_mixtral_large_instruct_2407.py
Normal file
15
configs/models/mistral/vllm_mixtral_large_instruct_2407.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import VLLMwithChatTemplate
|
||||||
|
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=VLLMwithChatTemplate,
|
||||||
|
abbr='mixtral-large-instruct-2407-vllm',
|
||||||
|
path='mistralai/Mistral-Large-Instruct-2407',
|
||||||
|
model_kwargs=dict(tensor_parallel_size=8),
|
||||||
|
max_out_len=256,
|
||||||
|
batch_size=16,
|
||||||
|
generation_kwargs=dict(temperature=0),
|
||||||
|
run_cfg=dict(num_gpus=8),
|
||||||
|
)
|
||||||
|
]
|
15
configs/models/openbmb/hf_minicpm3_4b.py
Normal file
15
configs/models/openbmb/hf_minicpm3_4b.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='MiniCPM3-4B-hf',
|
||||||
|
path='openbmb/MiniCPM3-4B',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
model_kwargs=dict(
|
||||||
|
torch_dtype='torch.bfloat16',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
]
|
12
configs/models/phi/hf_phi_3_5_MoE_instruct.py
Normal file
12
configs/models/phi/hf_phi_3_5_MoE_instruct.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='phi-3-5-MoE-instruct-hf',
|
||||||
|
path='microsoft/Phi-3.5-MoE-instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
12
configs/models/phi/hf_phi_3_5_mini_instruct.py
Normal file
12
configs/models/phi/hf_phi_3_5_mini_instruct.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='phi-3-5-mini-instruct-hf',
|
||||||
|
path='microsoft/Phi-3.5-mini-instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
5
configs/summarizers/groups/humanevalx.py
Normal file
5
configs/summarizers/groups/humanevalx.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
humanevalx_summary_groups = []
|
||||||
|
|
||||||
|
_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js']
|
||||||
|
_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all]
|
||||||
|
humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all})
|
63
opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
63
opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textA}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'B': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textB}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'C': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textC}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'D': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textD}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
type=ARCDataset,
|
||||||
|
abbr='ARC-c',
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'B': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
'A': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
57
opencompass/configs/datasets/race/race_few_shot_ppl.py
Normal file
57
opencompass/configs/datasets/race/race_few_shot_ppl.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
ans: dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt=f'Answer: {ans}'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for ans in ['A', 'B', 'C', 'D']
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,13 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='llama-3_1-70b-instruct-hf',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=4),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,13 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='llama-3_1-8b-instruct-hf',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,16 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='llama-3_1-70b-instruct-turbomind',
|
||||||
|
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=4),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
max_seq_len=7168,
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=4),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate
|
|||||||
models = [
|
models = [
|
||||||
dict(
|
dict(
|
||||||
type=TurboMindModelwithChatTemplate,
|
type=TurboMindModelwithChatTemplate,
|
||||||
abbr='llama-3.1-8b-instruct-turbomind',
|
abbr='llama-3_1-8b-instruct-turbomind',
|
||||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||||
engine_config=dict(max_batch_size=16, tp=1),
|
engine_config=dict(max_batch_size=16, tp=1),
|
||||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||||
|
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import VLLMwithChatTemplate
|
||||||
|
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=VLLMwithChatTemplate,
|
||||||
|
abbr='mixtral-large-instruct-2407-vllm',
|
||||||
|
path='mistralai/Mistral-Large-Instruct-2407',
|
||||||
|
model_kwargs=dict(tensor_parallel_size=8),
|
||||||
|
max_out_len=256,
|
||||||
|
batch_size=16,
|
||||||
|
generation_kwargs=dict(temperature=0),
|
||||||
|
run_cfg=dict(num_gpus=8),
|
||||||
|
)
|
||||||
|
]
|
15
opencompass/configs/models/openbmb/hf_minicpm3_4b.py
Normal file
15
opencompass/configs/models/openbmb/hf_minicpm3_4b.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='MiniCPM3-4B-hf',
|
||||||
|
path='openbmb/MiniCPM3-4B',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
model_kwargs=dict(
|
||||||
|
torch_dtype='torch.bfloat16',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
]
|
12
opencompass/configs/models/phi/hf_phi_3_5_MoE_instruct.py
Normal file
12
opencompass/configs/models/phi/hf_phi_3_5_MoE_instruct.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='phi-3-5-MoE-instruct-hf',
|
||||||
|
path='microsoft/Phi-3.5-MoE-instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
12
opencompass/configs/models/phi/hf_phi_3_5_mini_instruct.py
Normal file
12
opencompass/configs/models/phi/hf_phi_3_5_mini_instruct.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr='phi-3-5-mini-instruct-hf',
|
||||||
|
path='microsoft/Phi-3.5-mini-instruct',
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
5
opencompass/configs/summarizers/groups/humanevalx.py
Normal file
5
opencompass/configs/summarizers/groups/humanevalx.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
humanevalx_summary_groups = []
|
||||||
|
|
||||||
|
_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js']
|
||||||
|
_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all]
|
||||||
|
humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all})
|
@ -366,7 +366,7 @@ class DS1000ServiceEvaluator(BaseEvaluator):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
lib: str,
|
lib: str,
|
||||||
ip_address='localhost',
|
ip_address='localhost',
|
||||||
port=5000,
|
port='',
|
||||||
timeout=600) -> None:
|
timeout=600) -> None:
|
||||||
assert lib in _LIBRARY_NAME_LIST, (
|
assert lib in _LIBRARY_NAME_LIST, (
|
||||||
f' lib must be in {_LIBRARY_NAME_LIST}')
|
f' lib must be in {_LIBRARY_NAME_LIST}')
|
||||||
@ -421,9 +421,14 @@ class DS1000ServiceEvaluator(BaseEvaluator):
|
|||||||
Returns:
|
Returns:
|
||||||
tuple[bool, str]: Whether the access is successful and the output.
|
tuple[bool, str]: Whether the access is successful and the output.
|
||||||
"""
|
"""
|
||||||
|
if self.port:
|
||||||
|
eval_server_url = f'{self.ip_address}:{self.port}/evaluate'
|
||||||
|
else:
|
||||||
|
eval_server_url = f'{self.ip_address}/evaluate'
|
||||||
|
|
||||||
exec_result = subprocess.run([
|
exec_result = subprocess.run([
|
||||||
'curl', '-X', 'POST', '-F', f'file=@{file_path}',
|
'curl', '-X', 'POST', '-F', f'file=@{file_path}',
|
||||||
f'{self.ip_address}:{self.port}/evaluate'
|
f'{eval_server_url}'
|
||||||
],
|
],
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
capture_output=True)
|
capture_output=True)
|
||||||
|
@ -76,7 +76,7 @@ class HumanevalXEvaluator(BaseEvaluator):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
language,
|
language,
|
||||||
ip_address='localhost',
|
ip_address='localhost',
|
||||||
port=5000,
|
port='',
|
||||||
retry=2,
|
retry=2,
|
||||||
timeout=600) -> None:
|
timeout=600) -> None:
|
||||||
assert language in _LANGUAGE_NAME_DICT.keys(), (
|
assert language in _LANGUAGE_NAME_DICT.keys(), (
|
||||||
@ -141,10 +141,13 @@ class HumanevalXEvaluator(BaseEvaluator):
|
|||||||
f'\nError Information: {output}')
|
f'\nError Information: {output}')
|
||||||
|
|
||||||
def _code_eval_service(self, file_path):
|
def _code_eval_service(self, file_path):
|
||||||
|
if self.port:
|
||||||
|
eval_server_url = f'{self.ip_address}:{self.port}/evaluate'
|
||||||
|
else:
|
||||||
|
eval_server_url = f'{self.ip_address}/evaluate'
|
||||||
exec_result = subprocess.run([
|
exec_result = subprocess.run([
|
||||||
'curl', '-X', 'POST', '-F', f'file=@{file_path}', '-F',
|
'curl', '-X', 'POST', '-F', f'file=@{file_path}', '-F',
|
||||||
f'dataset=humanevalx/{self.language}',
|
f'dataset=humanevalx/{self.language}', f'{eval_server_url}'
|
||||||
f'{self.ip_address}:{self.port}/evaluate'
|
|
||||||
],
|
],
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
capture_output=True)
|
capture_output=True)
|
||||||
|
@ -86,6 +86,8 @@ class LmdeployPytorchModel(BaseModel):
|
|||||||
for token_id in generation_config.eos_token_id:
|
for token_id in generation_config.eos_token_id:
|
||||||
stop_words.append(token_id)
|
stop_words.append(token_id)
|
||||||
gen_config.stop_words = stop_words
|
gen_config.stop_words = stop_words
|
||||||
|
if version_info >= (0, 6, 0):
|
||||||
|
gen_config.stop_token_ids = stop_words
|
||||||
self.gen_config = gen_config
|
self.gen_config = gen_config
|
||||||
self.end_str = end_str
|
self.end_str = end_str
|
||||||
self.major_version, self.minor_version = version_info[:2]
|
self.major_version, self.minor_version = version_info[:2]
|
||||||
|
@ -126,6 +126,7 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
|||||||
'top_k': 1,
|
'top_k': 1,
|
||||||
'stop_words': encode_stop_words,
|
'stop_words': encode_stop_words,
|
||||||
}
|
}
|
||||||
|
|
||||||
gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG)
|
gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG)
|
||||||
gen_config.update(self.gen_config)
|
gen_config.update(self.gen_config)
|
||||||
if do_sample:
|
if do_sample:
|
||||||
@ -134,6 +135,9 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
|||||||
|
|
||||||
from lmdeploy.messages import GenerationConfig
|
from lmdeploy.messages import GenerationConfig
|
||||||
gen_config = GenerationConfig(**gen_config)
|
gen_config = GenerationConfig(**gen_config)
|
||||||
|
if self.version_info >= (0, 6, 0):
|
||||||
|
gen_config.stop_words = stop_words
|
||||||
|
gen_config.convert_stop_bad_words_to_ids(self.tokenizer)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for batch_message in batch_messages:
|
for batch_message in batch_messages:
|
||||||
|
@ -340,6 +340,14 @@ DATASETS_URL = {
|
|||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humaneval.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humaneval.zip",
|
||||||
"md5": "88b1b89dc47b7121c81da6bcd85a69c3",
|
"md5": "88b1b89dc47b7121c81da6bcd85a69c3",
|
||||||
},
|
},
|
||||||
|
"/humanevalx": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humanevalx.zip",
|
||||||
|
"md5": "22930355c03fb73fb5bae14b50f1deb9",
|
||||||
|
},
|
||||||
|
"/ds1000_data": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ds1000_data.zip",
|
||||||
|
"md5": "1a4990aec04a2fd73ccfad12e2d43b43",
|
||||||
|
},
|
||||||
"/drop_simple_eval/": {
|
"/drop_simple_eval/": {
|
||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/drop_simple_eval.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/drop_simple_eval.zip",
|
||||||
"md5": "c912afe5b4a63509851cf16e6b91830e",
|
"md5": "c912afe5b4a63509851cf16e6b91830e",
|
||||||
|
Loading…
Reference in New Issue
Block a user