mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Evaluation Results Persistence (#1894)
* feat results_station.py * lint * feat save_to_station * feat result_station.py and lint * feat * fix * fix and lint * fix * fix subjective processing * fix * fix * style function name * lint
This commit is contained in:
parent
54324657f0
commit
1585c0adbe
@ -12,7 +12,8 @@ from mmengine.config import Config, DictAction
|
||||
from opencompass.registry import PARTITIONERS, RUNNERS, build_from_cfg
|
||||
from opencompass.runners import SlurmRunner
|
||||
from opencompass.summarizers import DefaultSummarizer
|
||||
from opencompass.utils import LarkReporter, get_logger
|
||||
from opencompass.utils import (LarkReporter, get_logger, read_from_station,
|
||||
save_to_station)
|
||||
from opencompass.utils.run import (fill_eval_cfg, fill_infer_cfg,
|
||||
get_config_from_arg)
|
||||
|
||||
@ -127,6 +128,27 @@ def parse_args():
|
||||
'correctness of each sample, bpb, etc.',
|
||||
action='store_true',
|
||||
)
|
||||
|
||||
parser.add_argument('-sp',
|
||||
'--station-path',
|
||||
help='Path to your results station.',
|
||||
type=str,
|
||||
default=None,
|
||||
)
|
||||
|
||||
parser.add_argument('--station-overwrite',
|
||||
help='Whether to overwrite the results at station.',
|
||||
action='store_true',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--read-from-station',
|
||||
help='Whether to save the evaluation results to the '
|
||||
'data station.',
|
||||
action='store_true',
|
||||
)
|
||||
|
||||
|
||||
# set srun args
|
||||
slurm_parser = parser.add_argument_group('slurm_args')
|
||||
parse_slurm_args(slurm_parser)
|
||||
@ -260,6 +282,12 @@ def main():
|
||||
# types cannot be serialized
|
||||
cfg = Config.fromfile(output_config_path, format_python_code=False)
|
||||
|
||||
# get existed results from station
|
||||
if args.read_from_station:
|
||||
existing_results_list = read_from_station(cfg, args)
|
||||
rs_exist_results = [comb['combination'] for comb in existing_results_list]
|
||||
cfg['rs_exist_results'] = rs_exist_results
|
||||
|
||||
# report to lark bot if specify --lark
|
||||
if not args.lark:
|
||||
cfg['lark_bot_url'] = None
|
||||
@ -267,6 +295,7 @@ def main():
|
||||
content = f'{getpass.getuser()}\'s task has been launched!'
|
||||
LarkReporter(cfg['lark_bot_url']).post(content)
|
||||
|
||||
# infer
|
||||
if args.mode in ['all', 'infer']:
|
||||
# When user have specified --slurm or --dlc, or have not set
|
||||
# "infer" in config, we will provide a default configuration
|
||||
@ -348,6 +377,10 @@ def main():
|
||||
else:
|
||||
runner(tasks)
|
||||
|
||||
# save to station
|
||||
if args.station_path is not None or cfg.get('station_path') is not None:
|
||||
save_to_station(cfg, args)
|
||||
|
||||
# visualize
|
||||
if args.mode in ['all', 'eval', 'viz']:
|
||||
summarizer_cfg = cfg.get('summarizer', {})
|
||||
|
@ -102,6 +102,7 @@ class BasePartitioner:
|
||||
return tasks
|
||||
|
||||
def parse_model_dataset_args(self, cfg: ConfigDict):
|
||||
|
||||
models = cfg['models']
|
||||
datasets = cfg['datasets']
|
||||
|
||||
@ -109,7 +110,24 @@ class BasePartitioner:
|
||||
if 'model_dataset_combinations' in sig.parameters:
|
||||
combs = cfg.get('model_dataset_combinations', None)
|
||||
if combs is None:
|
||||
combs = [{'models': models, 'datasets': datasets}]
|
||||
if 'rs_exist_results' in cfg.keys():
|
||||
rs_exist_results = cfg['rs_exist_results']
|
||||
combs = []
|
||||
for model in models:
|
||||
comb = {'models': [model], 'datasets': datasets}
|
||||
combs.append(comb)
|
||||
for i in range(len(combs)):
|
||||
combs[i]['datasets'] = [
|
||||
dataset for dataset in combs[i]['datasets'] if [
|
||||
model_abbr_from_cfg(combs[i]['models'][0]),
|
||||
dataset_abbr_from_cfg(dataset)
|
||||
] not in rs_exist_results
|
||||
]
|
||||
combs = [
|
||||
comb for comb in combs if len(comb['datasets']) != 0
|
||||
]
|
||||
else:
|
||||
combs = [{'models': models, 'datasets': datasets}]
|
||||
else:
|
||||
# sanity check
|
||||
model_abbrs = [model_abbr_from_cfg(model) for model in models]
|
||||
|
@ -14,4 +14,5 @@ from .model_postprocessors import * # noqa
|
||||
from .network import * # noqa
|
||||
from .postprocessors import * # noqa
|
||||
from .prompt import * # noqa
|
||||
from .result_station import * # noqa
|
||||
from .text_postprocessors import * # noqa
|
||||
|
417
opencompass/utils/result_station.py
Normal file
417
opencompass/utils/result_station.py
Normal file
@ -0,0 +1,417 @@
|
||||
import json
|
||||
import os
|
||||
import os.path as osp
|
||||
import re
|
||||
|
||||
from opencompass.utils.abbr import (dataset_abbr_from_cfg,
|
||||
deal_with_judge_model_abbr,
|
||||
model_abbr_from_cfg)
|
||||
|
||||
|
||||
def save_to_station(cfg, args):
|
||||
|
||||
if args.station_path is not None:
|
||||
station_path = args.station_path
|
||||
else:
|
||||
station_path = cfg.get('station_path')
|
||||
|
||||
work_dict = cfg['work_dir']
|
||||
|
||||
# objective dataset processing
|
||||
if 'judge_models' not in cfg.keys():
|
||||
model_list = [model_abbr_from_cfg(model) for model in cfg['models']]
|
||||
dataset_list = [
|
||||
dataset_abbr_from_cfg(dataset) for dataset in cfg['datasets']
|
||||
]
|
||||
|
||||
rs_exist_results = []
|
||||
if 'rs_exist_results' in cfg.keys():
|
||||
rs_exist_results = cfg['rs_exist_results']
|
||||
|
||||
for dataset in dataset_list:
|
||||
result_path = osp.join(station_path, dataset)
|
||||
if not osp.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
for model in model_list:
|
||||
if ([model, dataset] in rs_exist_results
|
||||
and not args.station_overwrite):
|
||||
continue
|
||||
result_file_name = model + '.json'
|
||||
if osp.exists(osp.join(
|
||||
result_path,
|
||||
result_file_name)) and not args.station_overwrite:
|
||||
print('result of {} with {} already exists'.format(
|
||||
dataset, model))
|
||||
continue
|
||||
else:
|
||||
# get result dict
|
||||
local_result_path = osp.join(work_dict, 'results', model)
|
||||
local_result_json = osp.join(local_result_path,
|
||||
dataset + '.json')
|
||||
if not osp.exists(local_result_json):
|
||||
if args.mode == 'viz':
|
||||
continue
|
||||
raise ValueError(
|
||||
'invalid file: {}'.format(local_result_json))
|
||||
with open(local_result_json, 'r') as f:
|
||||
this_result = json.load(f)
|
||||
f.close()
|
||||
|
||||
# get prediction list
|
||||
local_prediction_path = osp.join(work_dict, 'predictions',
|
||||
model)
|
||||
local_prediction_regex = \
|
||||
rf'^{re.escape(dataset)}(?:_\d+)?\.json$'
|
||||
local_prediction_json = find_files_by_regex(
|
||||
local_prediction_path, local_prediction_regex)
|
||||
if not check_filenames(
|
||||
dataset,
|
||||
local_prediction_json) and args.mode != 'viz':
|
||||
raise ValueError('invalid filelist: {}'.format(
|
||||
local_prediction_json))
|
||||
|
||||
this_prediction = []
|
||||
for prediction_json in local_prediction_json:
|
||||
with open(
|
||||
osp.join(local_prediction_path,
|
||||
prediction_json), 'r') as f:
|
||||
this_prediction_load_json = json.load(f)
|
||||
f.close()
|
||||
for prekey in this_prediction_load_json.keys():
|
||||
this_prediction.append(
|
||||
this_prediction_load_json[prekey])
|
||||
|
||||
# get config dict
|
||||
model_cfg = [
|
||||
i for i in cfg['models']
|
||||
if model_abbr_from_cfg(i) == model
|
||||
][0]
|
||||
dataset_cfg = [
|
||||
i for i in cfg['datasets']
|
||||
if dataset_abbr_from_cfg(i) == dataset
|
||||
][0]
|
||||
this_cfg = {'models': model_cfg, 'datasets': dataset_cfg}
|
||||
|
||||
# dict combine
|
||||
data_model_results = {
|
||||
'predictions': this_prediction,
|
||||
'results': this_result,
|
||||
'cfg': this_cfg
|
||||
}
|
||||
with open(osp.join(result_path, result_file_name),
|
||||
'w') as f:
|
||||
json.dump(data_model_results,
|
||||
f,
|
||||
ensure_ascii=False,
|
||||
indent=4)
|
||||
f.close()
|
||||
print(
|
||||
'successfully save result of {} with {} to the station'
|
||||
.format(dataset, model))
|
||||
return True
|
||||
|
||||
# subjective processing
|
||||
else:
|
||||
model_list = [model for model in cfg['models']]
|
||||
judge_list = [judge_model for judge_model in cfg['judge_models']]
|
||||
model_pair_list = [[
|
||||
deal_with_judge_model_abbr(model, judge_model)
|
||||
for judge_model in judge_list
|
||||
] for model in model_list]
|
||||
|
||||
dataset_list = [[
|
||||
dataset_abbr_from_cfg(dataset),
|
||||
[dataset_abbr_from_cfg(base) for base in dataset['base_models']]
|
||||
] if 'base_models' in dataset.keys() else
|
||||
[dataset_abbr_from_cfg(dataset), ['']]
|
||||
for dataset in cfg['datasets']]
|
||||
|
||||
rs_exist_results = []
|
||||
if 'rs_exist_results' in cfg.keys():
|
||||
rs_exist_results = cfg['rs_exist_results']
|
||||
|
||||
for pair_of_dataset_and_base in dataset_list:
|
||||
dataset, base_list = pair_of_dataset_and_base[
|
||||
0], pair_of_dataset_and_base[1]
|
||||
|
||||
result_path = osp.join(station_path, dataset)
|
||||
if not osp.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
for base_model in base_list:
|
||||
base_model_name = base_model
|
||||
if base_model_name != '':
|
||||
base_model_name += '_'
|
||||
for model_pair_sub_list in model_pair_list:
|
||||
for model_pair in model_pair_sub_list:
|
||||
model = model_abbr_from_cfg(model_pair[0])
|
||||
model_result = model_abbr_from_cfg(model_pair)
|
||||
if ([model, dataset] in rs_exist_results
|
||||
and not args.station_overwrite):
|
||||
continue
|
||||
result_file_name = (base_model_name + model_result +
|
||||
'.json')
|
||||
if osp.exists(osp.join(result_path, result_file_name)
|
||||
) and not args.station_overwrite:
|
||||
print('{} at {} already exists'.format(
|
||||
result_file_name, result_path))
|
||||
continue
|
||||
else:
|
||||
# get result dict
|
||||
local_result_path = osp.join(
|
||||
work_dict, 'results',
|
||||
base_model_name + model_result)
|
||||
local_result_json = osp.join(
|
||||
local_result_path, dataset + '.json')
|
||||
if not osp.exists(local_result_json):
|
||||
if args.mode == 'viz':
|
||||
continue
|
||||
raise ValueError('invalid file: {}'.format(
|
||||
local_result_json))
|
||||
with open(local_result_json, 'r') as f:
|
||||
this_result = json.load(f)
|
||||
f.close()
|
||||
|
||||
# get prediction list
|
||||
local_prediction_path = osp.join(
|
||||
work_dict, 'predictions', model)
|
||||
local_prediction_regex = \
|
||||
rf'^{re.escape(dataset)}(?:_\d+)?\.json$'
|
||||
local_prediction_json = find_files_by_regex(
|
||||
local_prediction_path, local_prediction_regex)
|
||||
if not check_filenames(dataset,
|
||||
local_prediction_json
|
||||
) and args.mode != 'viz':
|
||||
raise ValueError('invalid filelist: {}'.format(
|
||||
local_prediction_json))
|
||||
|
||||
this_prediction = []
|
||||
for prediction_json in local_prediction_json:
|
||||
with open(
|
||||
osp.join(local_prediction_path,
|
||||
prediction_json), 'r') as f:
|
||||
this_prediction_load_json = json.load(f)
|
||||
f.close()
|
||||
for prekey in this_prediction_load_json.keys():
|
||||
this_prediction.append(
|
||||
this_prediction_load_json[prekey])
|
||||
|
||||
# get config dict
|
||||
model_cfg = [
|
||||
i for i in cfg['models']
|
||||
if model_abbr_from_cfg(i) == model
|
||||
][0]
|
||||
dataset_cfg = [
|
||||
i for i in cfg['datasets']
|
||||
if dataset_abbr_from_cfg(i) == dataset
|
||||
][0]
|
||||
judge_model_cfg = [
|
||||
i for i in cfg['judge_models']
|
||||
if 'judged-by--' + model_abbr_from_cfg(i) ==
|
||||
model_abbr_from_cfg(model_pair[1])
|
||||
]
|
||||
|
||||
this_cfg = {
|
||||
'models': model_cfg,
|
||||
'datasets': dataset_cfg,
|
||||
'judge_models': judge_model_cfg
|
||||
}
|
||||
|
||||
# dict combine
|
||||
data_model_results = {
|
||||
'predictions': this_prediction,
|
||||
'results': this_result,
|
||||
'cfg': this_cfg
|
||||
}
|
||||
|
||||
with open(osp.join(result_path, result_file_name),
|
||||
'w') as f:
|
||||
json.dump(data_model_results,
|
||||
f,
|
||||
ensure_ascii=False,
|
||||
indent=4)
|
||||
f.close()
|
||||
print('successfully save result: {} at {} to the'
|
||||
'station'.format(result_file_name,
|
||||
result_path))
|
||||
return True
|
||||
|
||||
|
||||
def read_from_station(cfg, args):
|
||||
|
||||
assert args.station_path is not None or cfg.get('station_path') is not None
|
||||
if args.station_path is not None:
|
||||
station_path = args.station_path
|
||||
else:
|
||||
station_path = cfg.get('station_path')
|
||||
|
||||
# objective check
|
||||
if 'judge_models' not in cfg.keys():
|
||||
model_list = [model_abbr_from_cfg(model) for model in cfg['models']]
|
||||
dataset_list = [
|
||||
dataset_abbr_from_cfg(dataset) for dataset in cfg['datasets']
|
||||
]
|
||||
|
||||
existing_results_list = []
|
||||
result_local_path = osp.join(cfg['work_dir'], 'results')
|
||||
if not osp.exists(result_local_path):
|
||||
os.makedirs(result_local_path)
|
||||
|
||||
for dataset in dataset_list:
|
||||
for model in model_list:
|
||||
result_file_path = osp.join(station_path, dataset,
|
||||
model + '.json')
|
||||
if not osp.exists(result_file_path):
|
||||
print('do not find result file: {} with {} at station'.
|
||||
format(model, dataset))
|
||||
continue
|
||||
else:
|
||||
print('find result file: {} with {} at station'.format(
|
||||
model, dataset))
|
||||
with open(result_file_path, 'r') as f:
|
||||
download_json = json.load(f)
|
||||
f.close()
|
||||
existing_results_list.append({
|
||||
'combination': [model, dataset],
|
||||
'file':
|
||||
download_json
|
||||
})
|
||||
|
||||
# save results to local
|
||||
for i in existing_results_list:
|
||||
this_result = i['file']['results']
|
||||
this_result_local_path = osp.join(result_local_path,
|
||||
i['combination'][0])
|
||||
if not osp.exists(this_result_local_path):
|
||||
os.makedirs(this_result_local_path)
|
||||
this_result_local_file_path = osp.join(
|
||||
this_result_local_path, i['combination'][1] + '.json')
|
||||
if osp.exists(this_result_local_file_path):
|
||||
continue
|
||||
with open(this_result_local_file_path, 'w') as f:
|
||||
json.dump(this_result, f, ensure_ascii=False, indent=4)
|
||||
f.close()
|
||||
|
||||
return existing_results_list
|
||||
|
||||
# subjective check
|
||||
else:
|
||||
model_list = [model for model in cfg['models']]
|
||||
judge_list = [judge_model for judge_model in cfg['judge_models']]
|
||||
model_pair_list = [[
|
||||
deal_with_judge_model_abbr(model, judge_model)
|
||||
for judge_model in judge_list
|
||||
] for model in model_list]
|
||||
|
||||
dataset_list = [[
|
||||
dataset_abbr_from_cfg(dataset),
|
||||
[dataset_abbr_from_cfg(base) for base in dataset['base_models']]
|
||||
] if 'base_models' in dataset.keys() else
|
||||
[dataset_abbr_from_cfg(dataset), ['']]
|
||||
for dataset in cfg['datasets']]
|
||||
|
||||
existing_results_list = []
|
||||
result_local_path = osp.join(cfg['work_dir'], 'results')
|
||||
if not osp.exists(result_local_path):
|
||||
os.makedirs(result_local_path)
|
||||
|
||||
for pair_of_dataset_and_base in dataset_list:
|
||||
dataset, base_list = pair_of_dataset_and_base[
|
||||
0], pair_of_dataset_and_base[1]
|
||||
|
||||
for model_pair_sub_list in model_pair_list:
|
||||
result_file_path_list_origin = []
|
||||
for model_pair in model_pair_sub_list:
|
||||
model_result = model_abbr_from_cfg(model_pair)
|
||||
for base_model in base_list:
|
||||
base_model_name = base_model
|
||||
if base_model_name != '':
|
||||
base_model_name += '_'
|
||||
|
||||
result_file_path_list_origin.append(
|
||||
osp.join(station_path, dataset,
|
||||
base_model_name + model_result + '.json'))
|
||||
|
||||
result_file_path_list = [
|
||||
result_file_path
|
||||
for result_file_path in result_file_path_list_origin
|
||||
if osp.exists(result_file_path)
|
||||
]
|
||||
model = model_abbr_from_cfg(model_pair_sub_list[0][0])
|
||||
|
||||
# save all parts of results to local
|
||||
for result_file_path in result_file_path_list:
|
||||
with open(result_file_path, 'r') as f:
|
||||
this_result = json.load(f)['results']
|
||||
f.close()
|
||||
this_result_local_path = osp.join(
|
||||
result_local_path,
|
||||
osp.splitext(osp.basename(result_file_path))[0])
|
||||
if not osp.exists(this_result_local_path):
|
||||
os.makedirs(this_result_local_path)
|
||||
this_result_local_file_path = osp.join(
|
||||
this_result_local_path, dataset + '.json')
|
||||
if osp.exists(this_result_local_file_path):
|
||||
continue
|
||||
with open(this_result_local_file_path, 'w') as f:
|
||||
json.dump(this_result, f, ensure_ascii=False, indent=4)
|
||||
f.close()
|
||||
|
||||
# check whether complete
|
||||
if len(result_file_path_list) == len(
|
||||
result_file_path_list_origin):
|
||||
print('find complete results of {} with {} at station'.
|
||||
format(model, dataset))
|
||||
existing_results_list.append({
|
||||
'combination': [model, dataset],
|
||||
'file':
|
||||
result_file_path_list
|
||||
})
|
||||
else:
|
||||
print('results of {} with {} at station is not complete'.
|
||||
format(model, dataset))
|
||||
|
||||
return existing_results_list
|
||||
|
||||
|
||||
def find_files_by_regex(directory, pattern):
|
||||
|
||||
regex = re.compile(pattern)
|
||||
|
||||
matched_files = []
|
||||
for filename in os.listdir(directory):
|
||||
if regex.match(filename):
|
||||
matched_files.append(filename)
|
||||
|
||||
return matched_files
|
||||
|
||||
|
||||
def check_filenames(x, filenames):
|
||||
|
||||
if not filenames:
|
||||
return False
|
||||
|
||||
single_pattern = re.compile(rf'^{re.escape(x)}\.json$')
|
||||
numbered_pattern = re.compile(rf'^{re.escape(x)}_(\d+)\.json$')
|
||||
|
||||
is_single = all(single_pattern.match(name) for name in filenames)
|
||||
is_numbered = all(numbered_pattern.match(name) for name in filenames)
|
||||
|
||||
if not (is_single or is_numbered):
|
||||
return False
|
||||
|
||||
if is_single:
|
||||
return len(filenames) == 1
|
||||
|
||||
if is_numbered:
|
||||
numbers = []
|
||||
for name in filenames:
|
||||
match = numbered_pattern.match(name)
|
||||
if match:
|
||||
numbers.append(int(match.group(1)))
|
||||
|
||||
if sorted(numbers) != list(range(len(numbers))):
|
||||
return False
|
||||
|
||||
return True
|
Loading…
Reference in New Issue
Block a user