OpenCompass/opencompass/datasets/subjective/utils.py

34 lines
1.1 KiB
Python
Raw Normal View History

# flake8: noqa: E501
def get_judgeanswer_and_reference(result, filename, post_process):
"""Extract judgements (scores) and references.
Args:
result (ConfigDict): Dataset config.
filename (str): Model path in results dir.
post_process (function): The pre-defined extract function.
"""
if len(result) == 0:
print('*' * 100)
print('There are no results for ' + filename)
print('*' * 100)
judged_answers = []
references = []
for k, v in result.items():
processed_judge = post_process(v)
if processed_judge is not None:
judged_answers.append(processed_judge)
references.append(v['gold'])
# else:
# print(v['prediction'])
# print('-' * 128)
if len(judged_answers) <= 0.95 * len(result):
print('*' * 100)
print(
f'For your {filename} judge. Among {len(result)} judgements, successfully extracted {len(judged_answers)} judgements, please check!'
)
print('*' * 100)
return judged_answers, references