diff --git a/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py b/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py new file mode 100644 index 00000000..31087ce8 --- /dev/null +++ b/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py @@ -0,0 +1,63 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'A': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textA}'), + ], + ), + 'B': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textB}'), + ], + ), + 'C': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textC}'), + ], + ), + 'D': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textD}'), + ], + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer), +) + +ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +ARC_c_datasets = [ + dict( + type=ARCDataset, + abbr='ARC-c', + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py new file mode 100644 index 00000000..66528942 --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'B': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ] + ), + 'A': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ] + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/race/race_few_shot_ppl.py b/configs/datasets/race/race_few_shot_ppl.py new file mode 100644 index 00000000..2fa9cd1d --- /dev/null +++ b/configs/datasets/race/race_few_shot_ppl.py @@ -0,0 +1,57 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + ans: dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}', + ), + dict(role='BOT', prompt=f'Answer: {ans}'), + ], + ) + for ans in ['A', 'B', 'C', 'D'] + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=PPLInferencer), +) + +race_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +] diff --git a/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py new file mode 100644 index 00000000..4a17de93 --- /dev/null +++ b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py @@ -0,0 +1,13 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='llama-3_1-70b-instruct-hf', + path='meta-llama/Meta-Llama-3.1-70B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=4), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/configs/models/hf_llama/hf_llama3_1_8b_instruct.py b/configs/models/hf_llama/hf_llama3_1_8b_instruct.py new file mode 100644 index 00000000..3ae0d5f3 --- /dev/null +++ b/configs/models/hf_llama/hf_llama3_1_8b_instruct.py @@ -0,0 +1,13 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='llama-3_1-8b-instruct-hf', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py new file mode 100644 index 00000000..23f9bc2a --- /dev/null +++ b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py @@ -0,0 +1,16 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='llama-3_1-70b-instruct-turbomind', + path='meta-llama/Meta-Llama-3.1-70B-Instruct', + engine_config=dict(max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=4), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py index b7dedb72..429dfec7 100644 --- a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py +++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate models = [ dict( type=TurboMindModelwithChatTemplate, - abbr='llama-3.1-8b-instruct-turbomind', + abbr='llama-3_1-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), diff --git a/configs/models/mistral/vllm_mixtral_large_instruct_2407.py b/configs/models/mistral/vllm_mixtral_large_instruct_2407.py new file mode 100644 index 00000000..5f67f294 --- /dev/null +++ b/configs/models/mistral/vllm_mixtral_large_instruct_2407.py @@ -0,0 +1,15 @@ +from opencompass.models import VLLMwithChatTemplate + + +models = [ + dict( + type=VLLMwithChatTemplate, + abbr='mixtral-large-instruct-2407-vllm', + path='mistralai/Mistral-Large-Instruct-2407', + model_kwargs=dict(tensor_parallel_size=8), + max_out_len=256, + batch_size=16, + generation_kwargs=dict(temperature=0), + run_cfg=dict(num_gpus=8), + ) +] diff --git a/configs/models/openbmb/hf_minicpm3_4b.py b/configs/models/openbmb/hf_minicpm3_4b.py new file mode 100644 index 00000000..730324ea --- /dev/null +++ b/configs/models/openbmb/hf_minicpm3_4b.py @@ -0,0 +1,15 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='MiniCPM3-4B-hf', + path='openbmb/MiniCPM3-4B', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + model_kwargs=dict( + torch_dtype='torch.bfloat16', + ), + ) +] diff --git a/configs/models/phi/hf_phi_3_5_MoE_instruct.py b/configs/models/phi/hf_phi_3_5_MoE_instruct.py new file mode 100644 index 00000000..40153f8c --- /dev/null +++ b/configs/models/phi/hf_phi_3_5_MoE_instruct.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='phi-3-5-MoE-instruct-hf', + path='microsoft/Phi-3.5-MoE-instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/phi/hf_phi_3_5_mini_instruct.py b/configs/models/phi/hf_phi_3_5_mini_instruct.py new file mode 100644 index 00000000..f02e4c6f --- /dev/null +++ b/configs/models/phi/hf_phi_3_5_mini_instruct.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='phi-3-5-mini-instruct-hf', + path='microsoft/Phi-3.5-mini-instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/summarizers/groups/humanevalx.py b/configs/summarizers/groups/humanevalx.py new file mode 100644 index 00000000..e4c008be --- /dev/null +++ b/configs/summarizers/groups/humanevalx.py @@ -0,0 +1,5 @@ +humanevalx_summary_groups = [] + +_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js'] +_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all] +humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all}) diff --git a/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py new file mode 100644 index 00000000..31087ce8 --- /dev/null +++ b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py @@ -0,0 +1,63 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'A': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textA}'), + ], + ), + 'B': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textB}'), + ], + ), + 'C': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textC}'), + ], + ), + 'D': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textD}'), + ], + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer), +) + +ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +ARC_c_datasets = [ + dict( + type=ARCDataset, + abbr='ARC-c', + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py new file mode 100644 index 00000000..66528942 --- /dev/null +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'B': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ] + ), + 'A': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ] + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/race/race_few_shot_ppl.py b/opencompass/configs/datasets/race/race_few_shot_ppl.py new file mode 100644 index 00000000..2fa9cd1d --- /dev/null +++ b/opencompass/configs/datasets/race/race_few_shot_ppl.py @@ -0,0 +1,57 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + ans: dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}', + ), + dict(role='BOT', prompt=f'Answer: {ans}'), + ], + ) + for ans in ['A', 'B', 'C', 'D'] + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=PPLInferencer), +) + +race_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +] diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py new file mode 100644 index 00000000..4a17de93 --- /dev/null +++ b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py @@ -0,0 +1,13 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='llama-3_1-70b-instruct-hf', + path='meta-llama/Meta-Llama-3.1-70B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=4), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_8b_instruct.py b/opencompass/configs/models/hf_llama/hf_llama3_1_8b_instruct.py new file mode 100644 index 00000000..3ae0d5f3 --- /dev/null +++ b/opencompass/configs/models/hf_llama/hf_llama3_1_8b_instruct.py @@ -0,0 +1,13 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='llama-3_1-8b-instruct-hf', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py new file mode 100644 index 00000000..23f9bc2a --- /dev/null +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py @@ -0,0 +1,16 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='llama-3_1-70b-instruct-turbomind', + path='meta-llama/Meta-Llama-3.1-70B-Instruct', + engine_config=dict(max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + max_seq_len=7168, + max_out_len=1024, + batch_size=16, + run_cfg=dict(num_gpus=4), + stop_words=['<|end_of_text|>', '<|eot_id|>'], + ) +] diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py index b7dedb72..429dfec7 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate models = [ dict( type=TurboMindModelwithChatTemplate, - abbr='llama-3.1-8b-instruct-turbomind', + abbr='llama-3_1-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), diff --git a/opencompass/configs/models/mistral/vllm_mixtral_large_instruct_2407.py b/opencompass/configs/models/mistral/vllm_mixtral_large_instruct_2407.py new file mode 100644 index 00000000..5f67f294 --- /dev/null +++ b/opencompass/configs/models/mistral/vllm_mixtral_large_instruct_2407.py @@ -0,0 +1,15 @@ +from opencompass.models import VLLMwithChatTemplate + + +models = [ + dict( + type=VLLMwithChatTemplate, + abbr='mixtral-large-instruct-2407-vllm', + path='mistralai/Mistral-Large-Instruct-2407', + model_kwargs=dict(tensor_parallel_size=8), + max_out_len=256, + batch_size=16, + generation_kwargs=dict(temperature=0), + run_cfg=dict(num_gpus=8), + ) +] diff --git a/opencompass/configs/models/openbmb/hf_minicpm3_4b.py b/opencompass/configs/models/openbmb/hf_minicpm3_4b.py new file mode 100644 index 00000000..730324ea --- /dev/null +++ b/opencompass/configs/models/openbmb/hf_minicpm3_4b.py @@ -0,0 +1,15 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='MiniCPM3-4B-hf', + path='openbmb/MiniCPM3-4B', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + model_kwargs=dict( + torch_dtype='torch.bfloat16', + ), + ) +] diff --git a/opencompass/configs/models/phi/hf_phi_3_5_MoE_instruct.py b/opencompass/configs/models/phi/hf_phi_3_5_MoE_instruct.py new file mode 100644 index 00000000..40153f8c --- /dev/null +++ b/opencompass/configs/models/phi/hf_phi_3_5_MoE_instruct.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='phi-3-5-MoE-instruct-hf', + path='microsoft/Phi-3.5-MoE-instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/phi/hf_phi_3_5_mini_instruct.py b/opencompass/configs/models/phi/hf_phi_3_5_mini_instruct.py new file mode 100644 index 00000000..f02e4c6f --- /dev/null +++ b/opencompass/configs/models/phi/hf_phi_3_5_mini_instruct.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFacewithChatTemplate + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='phi-3-5-mini-instruct-hf', + path='microsoft/Phi-3.5-mini-instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/summarizers/groups/humanevalx.py b/opencompass/configs/summarizers/groups/humanevalx.py new file mode 100644 index 00000000..e4c008be --- /dev/null +++ b/opencompass/configs/summarizers/groups/humanevalx.py @@ -0,0 +1,5 @@ +humanevalx_summary_groups = [] + +_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js'] +_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all] +humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all}) diff --git a/opencompass/datasets/ds1000.py b/opencompass/datasets/ds1000.py index ed55216b..2f267a15 100644 --- a/opencompass/datasets/ds1000.py +++ b/opencompass/datasets/ds1000.py @@ -366,7 +366,7 @@ class DS1000ServiceEvaluator(BaseEvaluator): def __init__(self, lib: str, ip_address='localhost', - port=5000, + port='', timeout=600) -> None: assert lib in _LIBRARY_NAME_LIST, ( f' lib must be in {_LIBRARY_NAME_LIST}') @@ -421,9 +421,14 @@ class DS1000ServiceEvaluator(BaseEvaluator): Returns: tuple[bool, str]: Whether the access is successful and the output. """ + if self.port: + eval_server_url = f'{self.ip_address}:{self.port}/evaluate' + else: + eval_server_url = f'{self.ip_address}/evaluate' + exec_result = subprocess.run([ 'curl', '-X', 'POST', '-F', f'file=@{file_path}', - f'{self.ip_address}:{self.port}/evaluate' + f'{eval_server_url}' ], timeout=self.timeout, capture_output=True) diff --git a/opencompass/datasets/humanevalx.py b/opencompass/datasets/humanevalx.py index 03901503..369df95c 100644 --- a/opencompass/datasets/humanevalx.py +++ b/opencompass/datasets/humanevalx.py @@ -76,7 +76,7 @@ class HumanevalXEvaluator(BaseEvaluator): def __init__(self, language, ip_address='localhost', - port=5000, + port='', retry=2, timeout=600) -> None: assert language in _LANGUAGE_NAME_DICT.keys(), ( @@ -141,10 +141,13 @@ class HumanevalXEvaluator(BaseEvaluator): f'\nError Information: {output}') def _code_eval_service(self, file_path): + if self.port: + eval_server_url = f'{self.ip_address}:{self.port}/evaluate' + else: + eval_server_url = f'{self.ip_address}/evaluate' exec_result = subprocess.run([ 'curl', '-X', 'POST', '-F', f'file=@{file_path}', '-F', - f'dataset=humanevalx/{self.language}', - f'{self.ip_address}:{self.port}/evaluate' + f'dataset=humanevalx/{self.language}', f'{eval_server_url}' ], timeout=self.timeout, capture_output=True) diff --git a/opencompass/models/lmdeploy_pytorch.py b/opencompass/models/lmdeploy_pytorch.py index bb195b64..80924c27 100644 --- a/opencompass/models/lmdeploy_pytorch.py +++ b/opencompass/models/lmdeploy_pytorch.py @@ -86,6 +86,8 @@ class LmdeployPytorchModel(BaseModel): for token_id in generation_config.eos_token_id: stop_words.append(token_id) gen_config.stop_words = stop_words + if version_info >= (0, 6, 0): + gen_config.stop_token_ids = stop_words self.gen_config = gen_config self.end_str = end_str self.major_version, self.minor_version = version_info[:2] diff --git a/opencompass/models/turbomind_with_tf_above_v4_33.py b/opencompass/models/turbomind_with_tf_above_v4_33.py index ccda31f6..48706671 100644 --- a/opencompass/models/turbomind_with_tf_above_v4_33.py +++ b/opencompass/models/turbomind_with_tf_above_v4_33.py @@ -126,6 +126,7 @@ class TurboMindModelwithChatTemplate(BaseModel): 'top_k': 1, 'stop_words': encode_stop_words, } + gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG) gen_config.update(self.gen_config) if do_sample: @@ -134,6 +135,9 @@ class TurboMindModelwithChatTemplate(BaseModel): from lmdeploy.messages import GenerationConfig gen_config = GenerationConfig(**gen_config) + if self.version_info >= (0, 6, 0): + gen_config.stop_words = stop_words + gen_config.convert_stop_bad_words_to_ids(self.tokenizer) results = [] for batch_message in batch_messages: diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index cb8475f8..bf414bf5 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -340,6 +340,14 @@ DATASETS_URL = { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humaneval.zip", "md5": "88b1b89dc47b7121c81da6bcd85a69c3", }, + "/humanevalx": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humanevalx.zip", + "md5": "22930355c03fb73fb5bae14b50f1deb9", + }, + "/ds1000_data": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ds1000_data.zip", + "md5": "1a4990aec04a2fd73ccfad12e2d43b43", + }, "/drop_simple_eval/": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/drop_simple_eval.zip", "md5": "c912afe5b4a63509851cf16e6b91830e",