feat result_station.py and lint

This commit is contained in:
Myhs-phz 2025-02-28 06:38:25 +00:00
parent 2aaab41dc9
commit c3ad4b5603
4 changed files with 117 additions and 217 deletions

View File

@ -12,7 +12,8 @@ from mmengine.config import Config, DictAction
from opencompass.registry import PARTITIONERS, RUNNERS, build_from_cfg from opencompass.registry import PARTITIONERS, RUNNERS, build_from_cfg
from opencompass.runners import SlurmRunner from opencompass.runners import SlurmRunner
from opencompass.summarizers import DefaultSummarizer from opencompass.summarizers import DefaultSummarizer
from opencompass.utils import LarkReporter, get_logger, Save_To_Station from opencompass.utils import (LarkReporter, Read_From_Station,
Save_To_Station, get_logger)
from opencompass.utils.run import (fill_eval_cfg, fill_infer_cfg, from opencompass.utils.run import (fill_eval_cfg, fill_infer_cfg,
get_config_from_arg) get_config_from_arg)
@ -64,8 +65,9 @@ def parse_args():
help='Running mode. You can choose "infer" if you ' help='Running mode. You can choose "infer" if you '
'only want the inference results, or "eval" if you ' 'only want the inference results, or "eval" if you '
'already have the results and want to evaluate them, ' 'already have the results and want to evaluate them, '
'or "viz" if you want to visualize the results.', 'or "viz" if you want to visualize the results,'
choices=['all', 'infer', 'eval', 'viz'], 'or "rs" if you want to search results from your station.',
choices=['all', 'infer', 'eval', 'viz', 'rs'],
default='all', default='all',
type=str) type=str)
parser.add_argument('-r', parser.add_argument('-r',
@ -133,13 +135,7 @@ def parse_args():
'data station.', 'data station.',
action='store_true', action='store_true',
) )
parser.add_argument( parser.add_argument('-sp',
'--read-station',
help='Whether to read the evaluation results from the '
'data station.',
action='store_true',
)
parser.add_argument(
'--station-path', '--station-path',
help='Path to your reuslts station.', help='Path to your reuslts station.',
type=str, type=str,
@ -260,6 +256,8 @@ def main():
else: else:
dirs = os.listdir(cfg.work_dir) dirs = os.listdir(cfg.work_dir)
dir_time_str = sorted(dirs)[-1] dir_time_str = sorted(dirs)[-1]
elif args.reuse == 'station':
Read_From_Station(cfg, args, dir_time_str)
else: else:
dir_time_str = args.reuse dir_time_str = args.reuse
logger.info(f'Reusing experiements from {dir_time_str}') logger.info(f'Reusing experiements from {dir_time_str}')

View File

@ -14,5 +14,5 @@ from .model_postprocessors import * # noqa
from .network import * # noqa from .network import * # noqa
from .postprocessors import * # noqa from .postprocessors import * # noqa
from .prompt import * # noqa from .prompt import * # noqa
from .text_postprocessors import * # noqa
from .result_station import * # noqa from .result_station import * # noqa
from .text_postprocessors import * # noqa

View File

@ -1,27 +1,22 @@
import json
import os import os
import os.path as osp import os.path as osp
from typing import List, Tuple, Union
from mmengine.config import Config
import json
import re import re
def Save_To_Station(cfg, args): def Save_To_Station(cfg, args):
from dotenv import load_dotenv
load_dotenv()
station_path = os.getenv('RESULTS_STATION_PATH')
assert station_path != None or args.station_path != None
station_path = args.station_path if station_path == None else station_path
assert args.station_path is not None or 'station_path' in cfg.keys(
) and cfg['station_path'] is not None
if 'station_path' in cfg.keys() and cfg['station_path'] is not None:
station_path = cfg['station_path']
else:
station_path = args.station_path
work_dict = cfg['work_dir'] work_dict = cfg['work_dir']
model_list = [i['abbr'] for i in cfg['models']] model_list = [i['abbr'] for i in cfg['models']]
dataset_list = [i['abbr'] for i in cfg['datasets']] dataset_list = [i['abbr'] for i in cfg['datasets']]
for dataset in dataset_list: for dataset in dataset_list:
result_path = osp.join(station_path, dataset) result_path = osp.join(station_path, dataset)
if not osp.exists(result_path): if not osp.exists(result_path):
@ -30,7 +25,8 @@ def Save_To_Station(cfg, args):
for model in model_list: for model in model_list:
result_file_name = model + '.json' result_file_name = model + '.json'
if osp.exists(osp.join(result_path, result_file_name)): if osp.exists(osp.join(result_path, result_file_name)):
print('result of {} with {} already exists'.format(dataset, model)) print('result of {} with {} already exists'.format(
dataset, model))
continue continue
else: else:
@ -38,35 +34,122 @@ def Save_To_Station(cfg, args):
local_result_path = work_dict + '/results/' + model + '/' local_result_path = work_dict + '/results/' + model + '/'
local_result_json = local_result_path + dataset + '.json' local_result_json = local_result_path + dataset + '.json'
if not osp.exists(local_result_json): if not osp.exists(local_result_json):
raise ValueError('invalid file: {}'.format(local_result_json)) raise ValueError(
'invalid file: {}'.format(local_result_json))
with open(local_result_json, 'r') as f: with open(local_result_json, 'r') as f:
this_result = json.load(f) this_result = json.load(f)
f.close() f.close()
# get prediction list # get prediction list
local_prediction_path = work_dict + '/predictions/' + model + '/' local_prediction_path = (work_dict + '/predictions/' + model +
local_prediction_regex = rf"^{re.escape(dataset)}(?:_\d+)?\.json$" '/')
local_prediction_json = find_files_by_regex(local_prediction_path, local_prediction_regex) local_prediction_regex = \
rf'^{re.escape(dataset)}(?:_\d+)?\.json$'
local_prediction_json = find_files_by_regex(
local_prediction_path, local_prediction_regex)
if not check_filenames(dataset, local_prediction_json): if not check_filenames(dataset, local_prediction_json):
raise ValueError('invalid filelist: {}'.format(local_prediction_json)) raise ValueError(
'invalid filelist: {}'.format(local_prediction_json))
this_prediction = [] this_prediction = []
for prediction_json in local_prediction_json: for prediction_json in local_prediction_json:
with open(local_prediction_path + prediction_json, 'r') as f: with open(local_prediction_path + prediction_json,
'r') as f:
this_prediction_load_json = json.load(f) this_prediction_load_json = json.load(f)
f.close() f.close()
for prekey in this_prediction_load_json.keys(): for prekey in this_prediction_load_json.keys():
this_prediction.append(this_prediction_load_json[prekey]) this_prediction.append(
this_prediction_load_json[prekey])
# dict combine # dict combine
data_model_results = { data_model_results = {
'predictions': this_prediction, 'predictions': this_prediction,
'results': this_result 'results': this_result
} }
with open(osp.join(result_path, result_file_name), 'w') as f: with open(osp.join(result_path, result_file_name), 'w') as f:
json.dump(data_model_results, f, ensure_ascii=False, indent=4) json.dump(data_model_results,
f,
ensure_ascii=False,
indent=4)
f.close()
print('result of {} with {} already exists'.format(
dataset, model))
return True
def Read_From_Station(cfg, args, dir_time_str):
assert args.station_path is not None or 'station_path' in cfg.keys(
) and cfg['station_path'] is not None
if 'station_path' in cfg.keys() and cfg['station_path'] is not None:
station_path = cfg['station_path']
else:
station_path = args.station_path
work_dict = osp.join(cfg.work_dir, dir_time_str)
model_list = [i['abbr'] for i in cfg['models']]
dataset_list = [i['abbr'] for i in cfg['datasets']]
if not osp.exists(work_dict):
os.makedirs(work_dict)
local_prediction_path = osp.join(work_dict, 'predictions')
if not osp.exists(local_prediction_path):
os.makedirs(local_prediction_path)
local_result_path = osp.join(work_dict, 'results')
if not osp.exists(local_result_path):
os.makedirs(local_result_path)
for model in model_list:
for data in dataset_list:
result_file_path = osp.join(station_path, data, model + '.json')
if not osp.exists(result_file_path):
print('do not find result file: {} with {} at station'.format(
model, data))
continue
else:
print('find result file: {} with {} at station'.format(
model, data))
with open(result_file_path, 'r') as f:
download_json = json.load(f)
f.close() f.close()
this_local_prediction_path = osp.join(local_prediction_path,
model)
if not osp.exists(this_local_prediction_path):
os.makedirs(this_local_prediction_path)
this_local_result_path = osp.join(local_result_path, model)
if not osp.exists(this_local_result_path):
os.makedirs(this_local_result_path)
this_local_prediction_path = osp.join(
this_local_prediction_path, data + '.json')
this_local_result_path = osp.join(this_local_result_path,
data + '.json')
download_json_prediction = download_json['predictions']
download_json_result = download_json['results']
# save predictions
local_prediction = {}
for i in range(len(download_json_prediction)):
local_prediction[str(i)] = download_json_prediction[i]
with open(this_local_prediction_path, 'w') as f:
json.dump(local_prediction,
f,
ensure_ascii=False,
indent=4)
f.close()
# save results
with open(this_local_result_path, 'w') as f:
json.dump(download_json_result,
f,
ensure_ascii=False,
indent=4)
f.close()
return True return True
@ -87,8 +170,8 @@ def check_filenames(x, filenames):
if not filenames: if not filenames:
return False return False
single_pattern = re.compile(rf"^{re.escape(x)}\.json$") single_pattern = re.compile(rf'^{re.escape(x)}\.json$')
numbered_pattern = re.compile(rf"^{re.escape(x)}_(\d+)\.json$") numbered_pattern = re.compile(rf'^{re.escape(x)}_(\d+)\.json$')
is_single = all(single_pattern.match(name) for name in filenames) is_single = all(single_pattern.match(name) for name in filenames)
is_numbered = all(numbered_pattern.match(name) for name in filenames) is_numbered = all(numbered_pattern.match(name) for name in filenames)

View File

@ -1,181 +0,0 @@
import argparse
import json
import os
import yaml
from dotenv import load_dotenv
load_dotenv()
RESULTS_STATION_PATH = os.getenv('RESULTS_STATION_PATH')
data_file_map = {
'ifeval': 'IFEval',
}
data_prefix_map = {}
with open('dataset-index.yml', 'r') as f1:
data_list = yaml.load(f1, Loader=yaml.FullLoader)
f1.close()
data_searchable_list = [next(iter(i.keys())) for i in data_list]
def parse_args():
parser = argparse.ArgumentParser(description='connect to results station')
parser.add_argument('-sp',
'--station-path',
type=str,
default=None,
help='if no env path, use this.')
parser.add_argument('-p',
'--my-path',
type=str,
default=None,
help='your operation path.')
parser.add_argument(
'-op',
'--operation',
type=str,
default='d',
help='u:update, d:download, ls: show dataset and model options')
parser.add_argument('-d',
'--dataset',
type=str,
default='mmlu_pro',
help='target dataset name')
parser.add_argument('-m',
'--model',
type=str,
default='deepseek-v2_5-turbomind',
help='target model name')
# parser.add_argument('-all',
# '--all-transfer',
# action='store_true',
# default=False,
# help='transfer all files under the path')
args = parser.parse_args()
return args
def read_json(path):
results = []
for i in path:
with open(i, 'r') as f:
results.append(json.load(f))
f.close()
return results
def load_json_files_by_prefix(prefix, target_path):
if prefix in data_file_map.keys():
prefix = data_file_map[prefix]
result_dict = {}
for filename in os.listdir(target_path):
if filename.startswith(prefix) and filename.endswith('.json'):
file_path = os.path.join(target_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
json_data = json.load(file)
result_dict[os.path.splitext(filename)[0]] = json_data
return result_dict
def main(path, mypath, args):
if args.dataset not in data_searchable_list:
raise ValueError('invalid dataset input!')
update_path = path + args.dataset if path[
-1] == '/' else path + '/' + args.dataset
update_filename = args.dataset + '_' + args.model + '.json'
update_goal = update_path + '/' + update_filename
# update from your path to result station
if args.operation == 'u':
mypath_prediction = (mypath + 'predictions/' +
args.model) if mypath[-1] == '/' else (
mypath + '/predictions/' + args.model)
mypath_result = (mypath + 'results/' +
args.model) if mypath[-1] == '/' else (mypath +
'/results/' +
args.model)
if os.path.exists(mypath_prediction) and os.path.exists(mypath_result):
result_dict = load_json_files_by_prefix(args.dataset,
mypath_result)
prediction_list = []
for i in result_dict.keys():
prediction_dict = load_json_files_by_prefix(
i, mypath_prediction)
for j in range(len(prediction_dict)):
for k in prediction_dict[i + '_' + str(j)].keys():
prediction_list.append({
'prediction':
prediction_dict[i + '_' + str(j)][k],
'sub_category':
i
})
update_dict = {
'predictions': prediction_list,
'results': result_dict,
}
if not os.path.exists(update_path):
os.makedirs(update_path)
if os.path.exists(update_goal):
input('This result exists! Press any key to continue...')
with open(update_goal, 'w', encoding='utf-8') as f:
json.dump(update_dict, f, ensure_ascii=False, indent=4)
f.close()
# read from result station to your path
if args.operation == 'd':
if not os.path.exists(update_goal):
raise ValueError('This result does not exist!')
with open(update_goal, 'r', encoding='utf-8') as f:
results = json.load(f)
f.close()
legal_key_set = {'predictions', 'results'}
if set(results.keys()) == legal_key_set and isinstance(
results['predictions'], list) and isinstance(
results['results'], dict):
print('Successfully download result from station!'
"you've got a dict with format as follows:"
"\n content['precitions', 'results']")
else:
raise ValueError('illegal format of the result!')
save_path = args.my_path if args.my_path[
-1] == '/' else args.my_path + '/'
save_path += args.dataset + '/'
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(save_path + update_filename, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4)
f.close()
if __name__ == '__main__':
args = parse_args()
if args.operation == 'ls':
print('----DATASET LIST----')
print(data_searchable_list)
print('----MODEL LIST----')
else:
if RESULTS_STATION_PATH is not None:
path = RESULTS_STATION_PATH
else:
path = args.station_path
if path is None:
raise ValueError('Please appoint the path of results station!')
if not os.path.exists(path):
raise ValueError('Not a valid path of results station!')
mypath = args.my_path
if mypath is None:
raise ValueError('Please appoint your own path!')
if not os.path.exists(mypath):
raise ValueError('Not a valid path of your own path!')
main(path, mypath, args)