From fc0556ec8ee9d9c5673b73fff6b14fea8a7f9bf7 Mon Sep 17 00:00:00 2001 From: Songyang Zhang Date: Tue, 31 Dec 2024 13:05:05 +0800 Subject: [PATCH] [Fix] Fix generic_llm_evaluator output_path (#1798) * Fix output_path * Add Logger --- opencompass/evaluator/generic_llm_evaluator.py | 16 +++++----------- opencompass/tasks/subjective_eval.py | 7 +------ 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/opencompass/evaluator/generic_llm_evaluator.py b/opencompass/evaluator/generic_llm_evaluator.py index 77b4e0e1..c248b8ec 100644 --- a/opencompass/evaluator/generic_llm_evaluator.py +++ b/opencompass/evaluator/generic_llm_evaluator.py @@ -1,5 +1,4 @@ import os.path as osp -import re from typing import Dict, List, Optional import mmengine @@ -13,16 +12,6 @@ from opencompass.utils import build_dataset_from_cfg, build_model_from_cfg from opencompass.utils.logging import get_logger -def count_chinese_characters(text): - words = re.findall(r'[\u4e00-\u9fff]', text) - return len(words) - - -def count_english_words(text): - words = re.findall(r'\b[a-zA-Z]+\b', text) - return len(words) - - class GenericLLMEvaluator(BaseEvaluator): """Generic LLM evaluator. @@ -47,6 +36,7 @@ class GenericLLMEvaluator(BaseEvaluator): self.logger = get_logger() self.judge_cfg = judge_cfg + self.output_path = '' self.prompt_template = ICL_PROMPT_TEMPLATES.build(prompt_template) @@ -64,6 +54,10 @@ class GenericLLMEvaluator(BaseEvaluator): out_dir, out_name = osp.split(output_path) out_name = f'{out_name}.json' + self.logger.info( + f'Set self.output_path to {self.output_path} for current task') + assert self.output_path is not None, 'output_path is None' + # Build LLM Inference max_out_len = self.judge_cfg.get('max_out_len', None) batch_size = self.judge_cfg.get('batch_size', None) diff --git a/opencompass/tasks/subjective_eval.py b/opencompass/tasks/subjective_eval.py index 17adaf3f..417c5cdb 100644 --- a/opencompass/tasks/subjective_eval.py +++ b/opencompass/tasks/subjective_eval.py @@ -39,7 +39,6 @@ class SubjectiveEvalTask(BaseTask): judge_cfg = cfg.get('judge_model', None) meta_judge_cfg = cfg.get('meta_judge_model', None) judge_models = cfg.get('judge_models', None) - keep_judger_postfix = cfg.get('keep_judger_postfix', True) if judge_cfg is None and meta_judge_cfg is None: assert judge_cfg is not None, 'Both judge_cfg and meta_judge_cfg are None, but judge_models must be provided.' @@ -57,7 +56,6 @@ class SubjectiveEvalTask(BaseTask): self.judge_models = judge_models self.infer_order = cfg.get('infer_order') self.given_pred = cfg['datasets'][0][0].get('given_pred', []) - self.keep_judger_postfix = keep_judger_postfix def get_command(self, cfg_path, template): """Get the command template for the task. @@ -101,11 +99,8 @@ class SubjectiveEvalTask(BaseTask): 'models': self.model_cfgs, 'datasets': self.dataset_cfgs }) - if self.keep_judger_postfix: - return self.name_prefix + task_name + \ + return self.name_prefix + task_name + \ '--judge-by--' + model_abbr_from_cfg(self.judge_cfg) - else: - return self.name_prefix + task_name def _load_model_pred( self,