Merge 7b47afb757 into 7a7a4517ab

2025-05-30 16:03:24 +08:00 · 2025-05-20 11:25:37 +08:00 · 2025-05-20 11:25:37 +08:00 · 4161af5d92
commit 4161af5d92
parent 7a7a4517ab 7b47afb757
4 changed files with 7 additions and 2 deletions
--- a/docs/en/advanced_guides/llm_judge.md
+++ b/docs/en/advanced_guides/llm_judge.md
@ -194,6 +194,8 @@ eval_cfg = dict(
        dict_postprocessor=dict(type=generic_llmjudge_postprocess),
    ),
    pred_role='BOT',
+    #num_gpus=1
+    #Add this in case of use local model as judge, adjust to the number of GPUs you wish to use for evaluation.
 )

 # Dataset configuration
--- a/docs/zh_cn/advanced_guides/llm_judge.md
+++ b/docs/zh_cn/advanced_guides/llm_judge.md
@ -194,6 +194,8 @@ eval_cfg = dict(
        dict_postprocessor=dict(type=generic_llmjudge_postprocess),
    ),
    pred_role='BOT',
+    #num_gpus=1
+    #使用本地模型评估时，设置num_gpus为希望在评估阶段使用的GPU数量
 )

 # 数据集配置
--- a/examples/eval_llm_judge.py
+++ b/examples/eval_llm_judge.py
@ -92,6 +92,7 @@ math_eval_cfg = dict(
        judge_cfg=lmdeploy_qwen2_5_14b_instruct_model[0],
        dict_postprocessor=dict(type=generic_llmjudge_postprocess),
    ),
+    num_gpus=1 # The amount of GPU used for LLM as judge evaluation
 )

 # Dataset configuration
--- a/opencompass/evaluator/generic_llm_evaluator.py
+++ b/opencompass/evaluator/generic_llm_evaluator.py
@ -10,11 +10,11 @@ from opencompass.openicl.icl_evaluator import BaseEvaluator
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.registry import (DICT_POSTPROCESSORS, ICL_PROMPT_TEMPLATES,
-                                  TEXT_POSTPROCESSORS)
+                                  TEXT_POSTPROCESSORS, ICL_EVALUATORS)
 from opencompass.utils import build_dataset_from_cfg, build_model_from_cfg
 from opencompass.utils.logging import get_logger

-
+@ICL_EVALUATORS.register_module()
 class GenericLLMEvaluator(BaseEvaluator):
    """Generic LLM evaluator.