mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add abbr for judgemodel in subjective evaluation (#724)
* add_judgemodel_abbr * add judgemodel abbr
This commit is contained in:
parent
b35d991786
commit
fbb912ddf3
@ -116,15 +116,16 @@ class AlignmentBenchSummarizer:
|
||||
output_dir = osp.join(osp.split(output_path)[0], f'{self.time_str}')
|
||||
mmengine.mkdir_or_exist(output_dir)
|
||||
results_folder = osp.join(work_dir, 'results')
|
||||
fout = osp.join(output_dir, 'dimension.csv')
|
||||
fout2 = osp.join(output_dir, 'capability.csv')
|
||||
|
||||
fout_flag, fout_flag2 = 0, 0
|
||||
for subdir in os.listdir(results_folder):
|
||||
if subdir not in self.eval_model_abbrs:
|
||||
continue
|
||||
subdir_path = os.path.join(results_folder, subdir)
|
||||
if os.path.isdir(subdir_path):
|
||||
model = subdir
|
||||
model, judge_model = subdir.split('_')
|
||||
fout = osp.join(output_dir, judge_model + 'dimension.csv')
|
||||
fout2 = osp.join(output_dir, judge_model + 'capability.csv')
|
||||
for dataset in dataset_cfgs:
|
||||
dataset_abbr = dataset_abbr_from_cfg(dataset)
|
||||
filepath = os.path.join(subdir_path,
|
||||
|
@ -75,11 +75,12 @@ class Corev2Summarizer:
|
||||
output_dir = osp.join(osp.split(output_path)[0], f'{self.time_str}')
|
||||
mmengine.mkdir_or_exist(output_dir)
|
||||
results_folder = osp.join(work_dir, 'results')
|
||||
fout = osp.join(output_dir, 'report.csv')
|
||||
|
||||
for subdir in os.listdir(results_folder):
|
||||
subdir_path = os.path.join(results_folder, subdir)
|
||||
if os.path.isdir(subdir_path):
|
||||
model1, model2 = subdir.split('_')
|
||||
model1, model2, judge_model = subdir.split('_')
|
||||
fout = osp.join(output_dir, judge_model + '-report.csv')
|
||||
for dataset in dataset_cfgs:
|
||||
dataset_abbr = dataset_abbr_from_cfg(dataset)
|
||||
filepath = os.path.join(subdir_path,
|
||||
|
@ -76,11 +76,12 @@ class Creationv01Summarizer:
|
||||
output_dir = osp.join(osp.split(output_path)[0], f'{self.time_str}')
|
||||
mmengine.mkdir_or_exist(output_dir)
|
||||
results_folder = osp.join(work_dir, 'results')
|
||||
fout = osp.join(output_dir, 'report.csv')
|
||||
|
||||
for subdir in os.listdir(results_folder):
|
||||
subdir_path = os.path.join(results_folder, subdir)
|
||||
if os.path.isdir(subdir_path):
|
||||
model = subdir
|
||||
model, judge_model = subdir.split('_')
|
||||
fout = osp.join(output_dir, judge_model + '-report.csv')
|
||||
for dataset in dataset_cfgs:
|
||||
dataset_abbr = dataset_abbr_from_cfg(dataset)
|
||||
filepath = os.path.join(subdir_path,
|
||||
|
@ -69,7 +69,11 @@ class SubjectiveEvalTask(BaseTask):
|
||||
# Load Dataset
|
||||
eval_cfg = dataset_cfg.get('eval_cfg')
|
||||
output_column = dataset_cfg['reader_cfg']['output_column']
|
||||
|
||||
if type(model_cfg) == ConfigDict:
|
||||
model_cfg = (model_cfg, )
|
||||
model_cfg += ({
|
||||
'abbr': 'judged-by--' + self.judge_cfg['abbr']
|
||||
}, )
|
||||
out_path = get_infer_output_path(
|
||||
model_cfg, dataset_cfg, osp.join(self.work_dir, 'results'))
|
||||
if osp.exists(out_path):
|
||||
@ -153,7 +157,14 @@ class SubjectiveEvalTask(BaseTask):
|
||||
# Get out_path
|
||||
out_path = get_infer_output_path(model_cfg, dataset_cfg,
|
||||
osp.join(self.work_dir, 'results'))
|
||||
model_preds = self._load_model_pred(model_cfg, dataset_cfg, eval_cfg)
|
||||
new_model_cfg = []
|
||||
for m_cfg in model_cfg:
|
||||
if len(m_cfg) > 1:
|
||||
new_model_cfg.append(m_cfg)
|
||||
if len(new_model_cfg) == 1:
|
||||
new_model_cfg = new_model_cfg[0]
|
||||
model_preds = self._load_model_pred(new_model_cfg, dataset_cfg,
|
||||
eval_cfg)
|
||||
if not self.judge_cfg:
|
||||
raise ValueError('missing "eval.runner.task.judge_cfg"')
|
||||
eval_cfg['evaluator']['judge_cfg'] = self.judge_cfg
|
||||
@ -210,6 +221,27 @@ class SubjectiveEvalTask(BaseTask):
|
||||
|
||||
return s[start:end]
|
||||
|
||||
def get_output_paths(self, file_extension: str = 'json') -> List[str]:
|
||||
"""Get the paths to the output files. Every file should exist if the
|
||||
task succeeds.
|
||||
|
||||
Args:
|
||||
file_extension (str): The file extension of the output files.
|
||||
Default: 'json'.
|
||||
"""
|
||||
output_paths = []
|
||||
for model, datasets in zip(self.model_cfgs, self.dataset_cfgs):
|
||||
for dataset in datasets:
|
||||
if type(model) == ConfigDict:
|
||||
model = (model, )
|
||||
model += ({'abbr': 'judged-by--' + self.judge_cfg['abbr']}, )
|
||||
output_paths.append(
|
||||
get_infer_output_path(
|
||||
model, dataset,
|
||||
osp.join(self.work_dir, self.output_subdir),
|
||||
file_extension))
|
||||
return output_paths
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Score Calculator')
|
||||
|
@ -29,7 +29,7 @@ def extract_predictions_from_json(input_folder):
|
||||
|
||||
# for prediction
|
||||
output_path = os.path.join(sub_folder, model_name + '_submission.csv')
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
with open(output_path, 'w', encoding='utf-8-sig') as file:
|
||||
writer = csv.writer(file)
|
||||
for ans in tqdm(all_predictions):
|
||||
writer.writerow([str(ans)])
|
||||
|
Loading…
Reference in New Issue
Block a user