diff --git a/docs/en/advanced_guides/llm_judge.md b/docs/en/advanced_guides/llm_judge.md index f7e09d78..ca75adad 100644 --- a/docs/en/advanced_guides/llm_judge.md +++ b/docs/en/advanced_guides/llm_judge.md @@ -194,6 +194,8 @@ eval_cfg = dict( dict_postprocessor=dict(type=generic_llmjudge_postprocess), ), pred_role='BOT', + #num_gpus=1 + #Add this in case of use local model as judge, adjust to the number of GPUs you wish to use for evaluation. ) # Dataset configuration diff --git a/docs/zh_cn/advanced_guides/llm_judge.md b/docs/zh_cn/advanced_guides/llm_judge.md index 3cf9619b..38a1f34a 100644 --- a/docs/zh_cn/advanced_guides/llm_judge.md +++ b/docs/zh_cn/advanced_guides/llm_judge.md @@ -194,6 +194,8 @@ eval_cfg = dict( dict_postprocessor=dict(type=generic_llmjudge_postprocess), ), pred_role='BOT', + #num_gpus=1 + #使用本地模型评估时,设置num_gpus为希望在评估阶段使用的GPU数量 ) # 数据集配置 diff --git a/examples/eval_llm_judge.py b/examples/eval_llm_judge.py index b7e18463..2436f3c9 100644 --- a/examples/eval_llm_judge.py +++ b/examples/eval_llm_judge.py @@ -92,6 +92,7 @@ math_eval_cfg = dict( judge_cfg=lmdeploy_qwen2_5_14b_instruct_model[0], dict_postprocessor=dict(type=generic_llmjudge_postprocess), ), + num_gpus=1 # The amount of GPU used for LLM as judge evaluation ) # Dataset configuration diff --git a/opencompass/evaluator/generic_llm_evaluator.py b/opencompass/evaluator/generic_llm_evaluator.py index c205ec4b..c904c805 100644 --- a/opencompass/evaluator/generic_llm_evaluator.py +++ b/opencompass/evaluator/generic_llm_evaluator.py @@ -10,11 +10,11 @@ from opencompass.openicl.icl_evaluator import BaseEvaluator from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.registry import (DICT_POSTPROCESSORS, ICL_PROMPT_TEMPLATES, - TEXT_POSTPROCESSORS) + TEXT_POSTPROCESSORS, ICL_EVALUATORS) from opencompass.utils import build_dataset_from_cfg, build_model_from_cfg from opencompass.utils.logging import get_logger - +@ICL_EVALUATORS.register_module() class GenericLLMEvaluator(BaseEvaluator): """Generic LLM evaluator.