mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix generic_llm_evaluator output_path (#1798)
* Fix output_path * Add Logger
This commit is contained in:
parent
dc6035cfcb
commit
fc0556ec8e
@ -1,5 +1,4 @@
|
|||||||
import os.path as osp
|
import os.path as osp
|
||||||
import re
|
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
import mmengine
|
import mmengine
|
||||||
@ -13,16 +12,6 @@ from opencompass.utils import build_dataset_from_cfg, build_model_from_cfg
|
|||||||
from opencompass.utils.logging import get_logger
|
from opencompass.utils.logging import get_logger
|
||||||
|
|
||||||
|
|
||||||
def count_chinese_characters(text):
|
|
||||||
words = re.findall(r'[\u4e00-\u9fff]', text)
|
|
||||||
return len(words)
|
|
||||||
|
|
||||||
|
|
||||||
def count_english_words(text):
|
|
||||||
words = re.findall(r'\b[a-zA-Z]+\b', text)
|
|
||||||
return len(words)
|
|
||||||
|
|
||||||
|
|
||||||
class GenericLLMEvaluator(BaseEvaluator):
|
class GenericLLMEvaluator(BaseEvaluator):
|
||||||
"""Generic LLM evaluator.
|
"""Generic LLM evaluator.
|
||||||
|
|
||||||
@ -47,6 +36,7 @@ class GenericLLMEvaluator(BaseEvaluator):
|
|||||||
|
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
self.judge_cfg = judge_cfg
|
self.judge_cfg = judge_cfg
|
||||||
|
self.output_path = ''
|
||||||
|
|
||||||
self.prompt_template = ICL_PROMPT_TEMPLATES.build(prompt_template)
|
self.prompt_template = ICL_PROMPT_TEMPLATES.build(prompt_template)
|
||||||
|
|
||||||
@ -64,6 +54,10 @@ class GenericLLMEvaluator(BaseEvaluator):
|
|||||||
out_dir, out_name = osp.split(output_path)
|
out_dir, out_name = osp.split(output_path)
|
||||||
out_name = f'{out_name}.json'
|
out_name = f'{out_name}.json'
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f'Set self.output_path to {self.output_path} for current task')
|
||||||
|
assert self.output_path is not None, 'output_path is None'
|
||||||
|
|
||||||
# Build LLM Inference
|
# Build LLM Inference
|
||||||
max_out_len = self.judge_cfg.get('max_out_len', None)
|
max_out_len = self.judge_cfg.get('max_out_len', None)
|
||||||
batch_size = self.judge_cfg.get('batch_size', None)
|
batch_size = self.judge_cfg.get('batch_size', None)
|
||||||
|
@ -39,7 +39,6 @@ class SubjectiveEvalTask(BaseTask):
|
|||||||
judge_cfg = cfg.get('judge_model', None)
|
judge_cfg = cfg.get('judge_model', None)
|
||||||
meta_judge_cfg = cfg.get('meta_judge_model', None)
|
meta_judge_cfg = cfg.get('meta_judge_model', None)
|
||||||
judge_models = cfg.get('judge_models', None)
|
judge_models = cfg.get('judge_models', None)
|
||||||
keep_judger_postfix = cfg.get('keep_judger_postfix', True)
|
|
||||||
|
|
||||||
if judge_cfg is None and meta_judge_cfg is None:
|
if judge_cfg is None and meta_judge_cfg is None:
|
||||||
assert judge_cfg is not None, 'Both judge_cfg and meta_judge_cfg are None, but judge_models must be provided.'
|
assert judge_cfg is not None, 'Both judge_cfg and meta_judge_cfg are None, but judge_models must be provided.'
|
||||||
@ -57,7 +56,6 @@ class SubjectiveEvalTask(BaseTask):
|
|||||||
self.judge_models = judge_models
|
self.judge_models = judge_models
|
||||||
self.infer_order = cfg.get('infer_order')
|
self.infer_order = cfg.get('infer_order')
|
||||||
self.given_pred = cfg['datasets'][0][0].get('given_pred', [])
|
self.given_pred = cfg['datasets'][0][0].get('given_pred', [])
|
||||||
self.keep_judger_postfix = keep_judger_postfix
|
|
||||||
|
|
||||||
def get_command(self, cfg_path, template):
|
def get_command(self, cfg_path, template):
|
||||||
"""Get the command template for the task.
|
"""Get the command template for the task.
|
||||||
@ -101,11 +99,8 @@ class SubjectiveEvalTask(BaseTask):
|
|||||||
'models': self.model_cfgs,
|
'models': self.model_cfgs,
|
||||||
'datasets': self.dataset_cfgs
|
'datasets': self.dataset_cfgs
|
||||||
})
|
})
|
||||||
if self.keep_judger_postfix:
|
return self.name_prefix + task_name + \
|
||||||
return self.name_prefix + task_name + \
|
|
||||||
'--judge-by--' + model_abbr_from_cfg(self.judge_cfg)
|
'--judge-by--' + model_abbr_from_cfg(self.judge_cfg)
|
||||||
else:
|
|
||||||
return self.name_prefix + task_name
|
|
||||||
|
|
||||||
def _load_model_pred(
|
def _load_model_pred(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user