OpenCompass/opencompass/summarizers/subjective/utils.py

# flake8: noqa: E501
import os.path as osp

import mmengine

from opencompass.utils import dataset_abbr_from_cfg


def get_outdir(cfg, time_str):
    """Get out put path.

    Args:
        cfg (ConfigDict): The running config.
        time_str (str): Current time.
    """
    work_dir = cfg['work_dir']
    output_path = osp.join(work_dir, 'summary', f'summary_{time_str}.txt')
    output_dir = osp.join(osp.split(output_path)[0], f'{time_str}')
    mmengine.mkdir_or_exist(output_dir)
    results_folder = osp.join(work_dir, 'results')
    return output_dir, results_folder


def get_judgeanswer_and_reference(dataset, subdir_path, post_process):
    """Extract judgements (scores) and references.

    Args:
        dataset (ConfigDict): Dataset config.
        subdir_path (str): Model path in results dir.
        post_process (function): The pre-defined extract function.
    """
    dataset_abbr = dataset_abbr_from_cfg(dataset)
    filename = osp.join(subdir_path, dataset_abbr + '.json')
    partial_filename = osp.join(subdir_path, dataset_abbr + '_0.json')
    if osp.exists(osp.realpath(filename)):
        result = mmengine.load(filename)
    elif osp.exists(osp.realpath(partial_filename)):
        filename = partial_filename
        result = {}
        i = 1
        partial_dict_flag = 0
        while osp.exists(osp.realpath(filename)):
            res = mmengine.load(filename)
            for k, v in res.items():
                result[partial_dict_flag] = v
                partial_dict_flag += 1
            filename = osp.join(subdir_path,
                                dataset_abbr + '_' + str(i) + '.json')
            i += 1
    else:
        result = {}

    if len(result) == 0:
        print('*' * 100)
        print('There are no results for ' + filename + ' or ' +
              partial_filename)
        print('*' * 100)

    judged_answers = []
    references = []
    for k, v in result.items():
        processed_judge = post_process(v['prediction'])
        if processed_judge is not None:
            judged_answers.append(processed_judge)
            references.append(v['gold'])
        # else:
        #     print(v['prediction'])
        #     print('-' * 128)
    if len(judged_answers) <= 0.95 * len(result):
        print('*' * 100)
        print(
            f'For your {filename} judge. Among {len(result)} judgements, successfully extracted {len(judged_answers)} judgements, please check!'
        )
        print('*' * 100)
    return judged_answers, references
[Feature] Add other judgelm prompts for Alignbench (#731) * add judgellm prompts * add judgelm prompts * update import info * fix situation that no abbr in config * fix situation that no abbr in config * add summarizer for other judgellm * change config name * add maxlen * add maxlen * dict assert * dict assert * fix strings * fix strings 2023-12-27 17:54:53 +08:00			`# flake8: noqa: E501`
			`import os.path as osp`

			`import mmengine`

			`from opencompass.utils import dataset_abbr_from_cfg`


			`def get_outdir(cfg, time_str):`
			`"""Get out put path.`

			`Args:`
			`cfg (ConfigDict): The running config.`
			`time_str (str): Current time.`
			`"""`
			`work_dir = cfg['work_dir']`
			`output_path = osp.join(work_dir, 'summary', f'summary_{time_str}.txt')`
			`output_dir = osp.join(osp.split(output_path)[0], f'{time_str}')`
			`mmengine.mkdir_or_exist(output_dir)`
			`results_folder = osp.join(work_dir, 'results')`
			`return output_dir, results_folder`


			`def get_judgeanswer_and_reference(dataset, subdir_path, post_process):`
			`"""Extract judgements (scores) and references.`

			`Args:`
			`dataset (ConfigDict): Dataset config.`
			`subdir_path (str): Model path in results dir.`
			`post_process (function): The pre-defined extract function.`
			`"""`
			`dataset_abbr = dataset_abbr_from_cfg(dataset)`
			`filename = osp.join(subdir_path, dataset_abbr + '.json')`
			`partial_filename = osp.join(subdir_path, dataset_abbr + '_0.json')`
			`if osp.exists(osp.realpath(filename)):`
			`result = mmengine.load(filename)`
			`elif osp.exists(osp.realpath(partial_filename)):`
			`filename = partial_filename`
			`result = {}`
			`i = 1`
			`partial_dict_flag = 0`
			`while osp.exists(osp.realpath(filename)):`
			`res = mmengine.load(filename)`
			`for k, v in res.items():`
			`result[partial_dict_flag] = v`
			`partial_dict_flag += 1`
			`filename = osp.join(subdir_path,`
			`dataset_abbr + '_' + str(i) + '.json')`
			`i += 1`
			`else:`
			`result = {}`

			`if len(result) == 0:`
			`print('' 100)`
			`print('There are no results for ' + filename + ' or ' +`
			`partial_filename)`
			`print('' 100)`

			`judged_answers = []`
			`references = []`
			`for k, v in result.items():`
			`processed_judge = post_process(v['prediction'])`
			`if processed_judge is not None:`
			`judged_answers.append(processed_judge)`
			`references.append(v['gold'])`
[Sync] Sync with internal codes 2024.06.28 (#1279) 2024-06-28 14:16:34 +08:00			`# else:`
			`# print(v['prediction'])`
			`# print('-' * 128)`
[Refactor] Reorganize subjective eval (#1284) * fix pip version * fix pip version * reorganize subjective eval * reorg sub * reorg subeval * reorg subeval * update subjective doc * reorg subeval * reorg subeval 2024-07-05 22:11:37 +08:00			`if len(judged_answers) <= 0.95 * len(result):`
[Feature] Add other judgelm prompts for Alignbench (#731) * add judgellm prompts * add judgelm prompts * update import info * fix situation that no abbr in config * fix situation that no abbr in config * add summarizer for other judgellm * change config name * add maxlen * add maxlen * dict assert * dict assert * fix strings * fix strings 2023-12-27 17:54:53 +08:00			`print('' 100)`
			`print(`
[Refactor] Reorganize subjective eval (#1284) * fix pip version * fix pip version * reorganize subjective eval * reorg sub * reorg subeval * reorg subeval * update subjective doc * reorg subeval * reorg subeval 2024-07-05 22:11:37 +08:00			`f'For your {filename} judge. Among {len(result)} judgements, successfully extracted {len(judged_answers)} judgements, please check!'`
[Feature] Add other judgelm prompts for Alignbench (#731) * add judgellm prompts * add judgelm prompts * update import info * fix situation that no abbr in config * fix situation that no abbr in config * add summarizer for other judgellm * change config name * add maxlen * add maxlen * dict assert * dict assert * fix strings * fix strings 2023-12-27 17:54:53 +08:00			`)`
			`print('' 100)`
			`return judged_answers, references`