[Fix] fix corev2 (#838)

* fix corev2 * fix corev2
2025-05-30 16:03:24 +08:00 · 2024-01-24 18:15:29 +08:00 · 2024-01-24 18:15:29 +08:00 · 77be07dbb5
commit 77be07dbb5
parent 0991dd33a0
3 changed files with 15 additions and 110 deletions
--- a/configs/datasets/subjective/subjective_cmp/subjective_corev2.py
+++ b/configs/datasets/subjective/subjective_cmp/subjective_corev2.py
@ -12,7 +12,7 @@ subjective_reader_cfg = dict(
    )
 subjective_all_sets = [
-    "COREV2_6A_",
+    "COREV2_6A_all",
 ]
--- a/configs/eval_subjective_compare.py
+++ b/configs/eval_subjective_compare.py
@ -4,21 +4,22 @@ with read_base():
    from .models.qwen.hf_qwen_14b_chat import models as hf_qwen_14b_chat
    from .models.chatglm.hf_chatglm3_6b import models as hf_chatglm3_6b
    from .models.baichuan.hf_baichuan2_7b_chat import models as hf_baichuan2_7b
-    from .models.hf_internlm.hf_internlm_chat_7b import models as hf_internlm_chat_7b
+    from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_7b
-    from .models.hf_internlm.hf_internlm_chat_20b import models as hf_internlm_chat_20b
+    from .models.hf_internlm.hf_internlm2_chat_20b import models as internlm2_20b
-    from .datasets.subjective_cmp.subjective_corev2 import subjective_datasets
+    from .datasets.subjective.subjective_cmp.subjective_corev2 import subjective_datasets
 datasets = [*subjective_datasets]
 from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI
-from opencompass.partitioners import NaivePartitioner
+from opencompass.partitioners import NaivePartitioner, SizePartitioner
 from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
 from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
 from opencompass.runners import LocalRunner
 from opencompass.runners import SlurmSequentialRunner
 from opencompass.tasks import OpenICLInferTask
 from opencompass.tasks.subjective_eval import SubjectiveEvalTask
 from opencompass.summarizers import Corev2Summarizer
-models = [*hf_baichuan2_7b, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_internlm_chat_7b, *hf_qwen_14b_chat]
+models = [*internlm2_7b, *internlm2_20b]
 api_meta_template = dict(
    round=[
@ -31,10 +32,10 @@ api_meta_template = dict(
 )
 infer = dict(
-    partitioner=dict(type=NaivePartitioner),
+    partitioner=dict(type=SizePartitioner, max_task_size=500),
    runner=dict(
        type=SlurmSequentialRunner,
-        partition='llmeval',
+        partition='llm_dev2',
        quotatype='auto',
        max_num_workers=256,
        task=dict(type=OpenICLInferTask)),
@ -71,17 +72,17 @@ judge_model =    dict(
        run_cfg=dict(num_gpus=1, num_procs=1),
    )
 eval = dict(
    partitioner=dict(
-        type=SubjectiveNaivePartitioner,
+        type=SubjectiveSizePartitioner,
        mode='m2n',
-        base_models = [*hf_baichuan2_7b, *hf_chatglm3_6b],
+        max_task_size=500,
-        compare_models = [*hf_baichuan2_7b, *hf_qwen_7b_chat, *hf_chatglm3_6b, *hf_qwen_14b_chat]
+        base_models = [*internlm2_7b],
        compare_models = [*internlm2_20b]
    ),
    runner=dict(
        type=SlurmSequentialRunner,
-        partition='llmeval',
+        partition='llm_dev2',
        quotatype='auto',
        max_num_workers=256,
        task=dict(
@ -89,7 +90,7 @@ eval = dict(
        judge_cfg=judge_model
        )),
 )
-work_dir = './corev2/'
+work_dir = 'outputs/corev2/'
 summarizer = dict(
    type=Corev2Summarizer,
--- a/configs/eval_subjective_score.py
+++ b/configs/eval_subjective_score.py
@ -1,96 +0,0 @@
 from mmengine.config import read_base
 with read_base():
    from .models.qwen.hf_qwen_7b_chat import models as hf_qwen_7b_chat
    from .models.qwen.hf_qwen_14b_chat import models as hf_qwen_14b_chat
    from .models.chatglm.hf_chatglm3_6b import models as hf_chatglm3_6b
    from .models.baichuan.hf_baichuan2_7b_chat import models as hf_baichuan2_7b
    from .models.hf_internlm.hf_internlm_chat_7b import models as hf_internlm_chat_7b
    from .models.hf_internlm.hf_internlm_chat_20b import models as hf_internlm_chat_20b
    from .datasets.subjective_cmp.subjective_creation import subjective_datasets
 datasets = [*subjective_datasets]
 from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI
 from opencompass.partitioners import NaivePartitioner
 from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
 from opencompass.runners import LocalRunner
 from opencompass.runners import SlurmSequentialRunner
 from opencompass.tasks import OpenICLInferTask
 from opencompass.tasks.subjective_eval import SubjectiveEvalTask
 from opencompass.summarizers import Creationv01Summarizer
 models = [*hf_baichuan2_7b, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_internlm_chat_7b, *hf_qwen_14b_chat]
 api_meta_template = dict(
    round=[
        dict(role='HUMAN', api_role='HUMAN'),
        dict(role='BOT', api_role='BOT', generate=True)
    ],
    reserved_roles=[
        dict(role='SYSTEM', api_role='SYSTEM'),
    ],
 )
 infer = dict(
    partitioner=dict(type=NaivePartitioner),
    runner=dict(
        type=SlurmSequentialRunner,
        partition='llmeval',
        quotatype='auto',
        max_num_workers=256,
        task=dict(type=OpenICLInferTask)),
 )
 _meta_template = dict(
    round=[
        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
    ],
 )
 judge_model = dict(
        type=HuggingFaceCausalLM,
        abbr='qwen-7b-chat-hf',
        path="Qwen/Qwen-7B-Chat",
        tokenizer_path='Qwen/Qwen-7B-Chat',
        model_kwargs=dict(
            device_map='auto',
            trust_remote_code=True
        ),
        tokenizer_kwargs=dict(
            padding_side='left',
            truncation_side='left',
            trust_remote_code=True,
            use_fast=False,),
        pad_token_id=151643,
        max_out_len=2048,
        max_seq_len=2048,
        batch_size=8,
        meta_template=_meta_template,
        run_cfg=dict(num_gpus=1, num_procs=1),
    )
 eval = dict(
    partitioner=dict(
        type=SubjectiveNaivePartitioner,
        mode='singlescore',
        models = [*hf_baichuan2_7b]
    ),
    runner=dict(
        type=SlurmSequentialRunner,
        partition='llmeval',
        quotatype='auto',
        max_num_workers=256,
        task=dict(
            type=SubjectiveEvalTask,
            judge_cfg=judge_model
        )),
 )
 work_dir = './creation/'
 summarizer = dict(
    type=Creationv01Summarizer,
    match_method='smart',
 )