From 77be07dbb5bb1721ca46029fa63bc264f3d54f63 Mon Sep 17 00:00:00 2001 From: bittersweet1999 <148421775+bittersweet1999@users.noreply.github.com> Date: Wed, 24 Jan 2024 18:15:29 +0800 Subject: [PATCH] [Fix] fix corev2 (#838) * fix corev2 * fix corev2 --- .../subjective_cmp/subjective_corev2.py | 2 +- ...e_compare.py => eval_subjective_corev2.py} | 27 +++--- configs/eval_subjective_score.py | 96 ------------------- 3 files changed, 15 insertions(+), 110 deletions(-) rename configs/{eval_subjective_compare.py => eval_subjective_corev2.py} (75%) delete mode 100644 configs/eval_subjective_score.py diff --git a/configs/datasets/subjective/subjective_cmp/subjective_corev2.py b/configs/datasets/subjective/subjective_cmp/subjective_corev2.py index aa4b38d8..cb1c7f63 100644 --- a/configs/datasets/subjective/subjective_cmp/subjective_corev2.py +++ b/configs/datasets/subjective/subjective_cmp/subjective_corev2.py @@ -12,7 +12,7 @@ subjective_reader_cfg = dict( ) subjective_all_sets = [ - "COREV2_6A_", + "COREV2_6A_all", ] diff --git a/configs/eval_subjective_compare.py b/configs/eval_subjective_corev2.py similarity index 75% rename from configs/eval_subjective_compare.py rename to configs/eval_subjective_corev2.py index f08a7ba2..b16e017b 100644 --- a/configs/eval_subjective_compare.py +++ b/configs/eval_subjective_corev2.py @@ -4,21 +4,22 @@ with read_base(): from .models.qwen.hf_qwen_14b_chat import models as hf_qwen_14b_chat from .models.chatglm.hf_chatglm3_6b import models as hf_chatglm3_6b from .models.baichuan.hf_baichuan2_7b_chat import models as hf_baichuan2_7b - from .models.hf_internlm.hf_internlm_chat_7b import models as hf_internlm_chat_7b - from .models.hf_internlm.hf_internlm_chat_20b import models as hf_internlm_chat_20b - from .datasets.subjective_cmp.subjective_corev2 import subjective_datasets + from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_7b + from .models.hf_internlm.hf_internlm2_chat_20b import models as internlm2_20b + from .datasets.subjective.subjective_cmp.subjective_corev2 import subjective_datasets datasets = [*subjective_datasets] from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI -from opencompass.partitioners import NaivePartitioner +from opencompass.partitioners import NaivePartitioner, SizePartitioner from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner +from opencompass.partitioners.sub_size import SubjectiveSizePartitioner from opencompass.runners import LocalRunner from opencompass.runners import SlurmSequentialRunner from opencompass.tasks import OpenICLInferTask from opencompass.tasks.subjective_eval import SubjectiveEvalTask from opencompass.summarizers import Corev2Summarizer -models = [*hf_baichuan2_7b, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_internlm_chat_7b, *hf_qwen_14b_chat] +models = [*internlm2_7b, *internlm2_20b] api_meta_template = dict( round=[ @@ -31,10 +32,10 @@ api_meta_template = dict( ) infer = dict( - partitioner=dict(type=NaivePartitioner), + partitioner=dict(type=SizePartitioner, max_task_size=500), runner=dict( type=SlurmSequentialRunner, - partition='llmeval', + partition='llm_dev2', quotatype='auto', max_num_workers=256, task=dict(type=OpenICLInferTask)), @@ -71,17 +72,17 @@ judge_model = dict( run_cfg=dict(num_gpus=1, num_procs=1), ) - eval = dict( partitioner=dict( - type=SubjectiveNaivePartitioner, + type=SubjectiveSizePartitioner, mode='m2n', - base_models = [*hf_baichuan2_7b, *hf_chatglm3_6b], - compare_models = [*hf_baichuan2_7b, *hf_qwen_7b_chat, *hf_chatglm3_6b, *hf_qwen_14b_chat] + max_task_size=500, + base_models = [*internlm2_7b], + compare_models = [*internlm2_20b] ), runner=dict( type=SlurmSequentialRunner, - partition='llmeval', + partition='llm_dev2', quotatype='auto', max_num_workers=256, task=dict( @@ -89,7 +90,7 @@ eval = dict( judge_cfg=judge_model )), ) -work_dir = './corev2/' +work_dir = 'outputs/corev2/' summarizer = dict( type=Corev2Summarizer, diff --git a/configs/eval_subjective_score.py b/configs/eval_subjective_score.py deleted file mode 100644 index ac1f0de5..00000000 --- a/configs/eval_subjective_score.py +++ /dev/null @@ -1,96 +0,0 @@ -from mmengine.config import read_base -with read_base(): - from .models.qwen.hf_qwen_7b_chat import models as hf_qwen_7b_chat - from .models.qwen.hf_qwen_14b_chat import models as hf_qwen_14b_chat - from .models.chatglm.hf_chatglm3_6b import models as hf_chatglm3_6b - from .models.baichuan.hf_baichuan2_7b_chat import models as hf_baichuan2_7b - from .models.hf_internlm.hf_internlm_chat_7b import models as hf_internlm_chat_7b - from .models.hf_internlm.hf_internlm_chat_20b import models as hf_internlm_chat_20b - from .datasets.subjective_cmp.subjective_creation import subjective_datasets - -datasets = [*subjective_datasets] - -from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI -from opencompass.partitioners import NaivePartitioner -from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner -from opencompass.runners import LocalRunner -from opencompass.runners import SlurmSequentialRunner -from opencompass.tasks import OpenICLInferTask -from opencompass.tasks.subjective_eval import SubjectiveEvalTask -from opencompass.summarizers import Creationv01Summarizer -models = [*hf_baichuan2_7b, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_internlm_chat_7b, *hf_qwen_14b_chat] - -api_meta_template = dict( - round=[ - dict(role='HUMAN', api_role='HUMAN'), - dict(role='BOT', api_role='BOT', generate=True) - ], - reserved_roles=[ - dict(role='SYSTEM', api_role='SYSTEM'), - ], -) - -infer = dict( - partitioner=dict(type=NaivePartitioner), - runner=dict( - type=SlurmSequentialRunner, - partition='llmeval', - quotatype='auto', - max_num_workers=256, - task=dict(type=OpenICLInferTask)), -) - - -_meta_template = dict( - round=[ - dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'), - dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True), - ], -) - - -judge_model = dict( - type=HuggingFaceCausalLM, - abbr='qwen-7b-chat-hf', - path="Qwen/Qwen-7B-Chat", - tokenizer_path='Qwen/Qwen-7B-Chat', - model_kwargs=dict( - device_map='auto', - trust_remote_code=True - ), - tokenizer_kwargs=dict( - padding_side='left', - truncation_side='left', - trust_remote_code=True, - use_fast=False,), - pad_token_id=151643, - max_out_len=2048, - max_seq_len=2048, - batch_size=8, - meta_template=_meta_template, - run_cfg=dict(num_gpus=1, num_procs=1), - ) - - -eval = dict( - partitioner=dict( - type=SubjectiveNaivePartitioner, - mode='singlescore', - models = [*hf_baichuan2_7b] - ), - runner=dict( - type=SlurmSequentialRunner, - partition='llmeval', - quotatype='auto', - max_num_workers=256, - task=dict( - type=SubjectiveEvalTask, - judge_cfg=judge_model - )), -) -work_dir = './creation/' - -summarizer = dict( - type=Creationv01Summarizer, - match_method='smart', -) \ No newline at end of file