mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* fix pip version * fix pip version * update (#1522) Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> * [Feature] Update Models (#1518) * Update Models * Update * Update humanevalx * Update * Update * [Feature] Dataset prompts update for ARC, BoolQ, Race (#1527) add judgerbench and reorg sub add judgerbench and reorg subeval add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval --------- Co-authored-by: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com> Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> Co-authored-by: Songyang Zhang <tonysy@users.noreply.github.com> Co-authored-by: Linchen Xiao <xxllcc1993@gmail.com>
31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
# flake8: noqa: E501
|
|
def get_judgeanswer_and_reference(result, filename, post_process):
|
|
"""Extract judgements (scores) and references.
|
|
|
|
Args:
|
|
dataset (ConfigDict): Dataset config.
|
|
subdir_path (str): Model path in results dir.
|
|
post_process (function): The pre-defined extract function.
|
|
"""
|
|
if len(result) == 0:
|
|
print('*' * 100)
|
|
print('There are no results for ' + filename)
|
|
print('*' * 100)
|
|
judged_answers = []
|
|
references = []
|
|
for k, v in result.items():
|
|
processed_judge = post_process(v)
|
|
if processed_judge is not None:
|
|
judged_answers.append(processed_judge)
|
|
references.append(v['gold'])
|
|
# else:
|
|
# print(v['prediction'])
|
|
# print('-' * 128)
|
|
if len(judged_answers) <= 0.95 * len(result):
|
|
print('*' * 100)
|
|
print(
|
|
f'For your {filename} judge. Among {len(result)} judgements, successfully extracted {len(judged_answers)} judgements, please check!'
|
|
)
|
|
print('*' * 100)
|
|
return judged_answers, references
|