mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
94 lines
5.0 KiB
Python
94 lines
5.0 KiB
Python
![]() |
# flake8: noqa: E501
|
|||
|
from .common_answers import (common_false_list, common_start_false_dict,
|
|||
|
common_start_true_dict, common_true_list)
|
|||
|
|
|||
|
|
|||
|
def get_gt_label(item):
|
|||
|
return item['gt_answer']
|
|||
|
|
|||
|
|
|||
|
def get_pred_label(model_response, item, prompt_style, type):
|
|||
|
model_response = model_response.strip().lower()
|
|||
|
|
|||
|
low_index = len(model_response)
|
|||
|
start_str1_dict = common_start_true_dict
|
|||
|
start_str2_dict = common_start_false_dict
|
|||
|
|
|||
|
start_option1_list, start_option2_list = [], []
|
|||
|
# some of the model will give response containing the question,
|
|||
|
# we usually preprocess the response to remove the question part,
|
|||
|
# but sometimes due to the model's response format, some of the
|
|||
|
# question part is not removed, so here we are checking the response
|
|||
|
# with the question part as well.
|
|||
|
for key in start_str1_dict.keys():
|
|||
|
for str1 in start_str1_dict[key]:
|
|||
|
for i in range(key, len(str1) + 1):
|
|||
|
start_option1_list.append(str1[-i:])
|
|||
|
for key in start_str2_dict.keys():
|
|||
|
for str2 in start_str2_dict[key]:
|
|||
|
for i in range(key, len(str2) + 1):
|
|||
|
start_option2_list.append(str2[-i:])
|
|||
|
|
|||
|
inner_option1_list = [
|
|||
|
'can be identified', '可以被识别', '能被识别', 'answer (yes or no?): yes',
|
|||
|
'answer is yes', "\"yes\"", 'answer: yes', 'answer is: yes',
|
|||
|
'answer is:\n\nyes', 'answer is:\nyes', 'is identified.',
|
|||
|
'can be identified', '可以被识别', '能被识别', '答案是:是', '答案是:\n\n是', '答案是:\n是',
|
|||
|
'答案:是', '答案是是', "\"是\"", '是的', '答案为“是”', '答案是“是”', '可以识别', '答案:是',
|
|||
|
'答案:可以', '答案:“是”', 'thus answering yes', 'henceforth; answering yes',
|
|||
|
'by answering yes', 'answeristheyes', 'answer would be yes',
|
|||
|
'answer (yes)', 'hence answering yes', 'hence my answer yes',
|
|||
|
'answer would definitely become yes', 'answer remains yes',
|
|||
|
"my answer was 'yes'", 'thus concludes our answer yes',
|
|||
|
'must answer yes', "answer should be 'yes'", "answer remains 'yes'",
|
|||
|
'henceforth answering yes', 'answer should be marked yes',
|
|||
|
'answer comes out yes', "should answer 'yes",
|
|||
|
'our answer should be yes', 'you should answer yes',
|
|||
|
'concluding answer - yes', 'answer should indeed say yes',
|
|||
|
'answer : yes', 'answer should also be yes', 'hence answering yes',
|
|||
|
'the answer is trivially yes', 'answer: yes', 'the answer is (yes)',
|
|||
|
'答案应为“是”'
|
|||
|
] + common_true_list
|
|||
|
inner_option2_list = [
|
|||
|
'not identified', '不能被识别', '无法被识别', 'answer (yes or no?): no',
|
|||
|
'answer is no', "\"no\"", 'answer: no', 'answer is: no',
|
|||
|
'answer is:\n\nno', 'answer is:\nno', 'not identified', '不能被识别',
|
|||
|
'无法被识别', '答案是:否', '答案是:\n\n否', '答案是:\n否', '答案:否', '答案是否', "\"否\"",
|
|||
|
'回答是:否', '答案为“否”', '答案是“否”', '因果效应不可被识别', '答案:否', '答案:无法识别',
|
|||
|
'不存在可识别的因果效应', "doesn't have a causal relationship",
|
|||
|
'the correct answer should be no', 'answer would be no',
|
|||
|
'hence answering no', "answering your query 'no'",
|
|||
|
'therefore answering no', 'answer would be “no”', 'thus answering no',
|
|||
|
'this answers no', 'thus, answering no', 'answer should also be no',
|
|||
|
'answer would also turn out to be no', 'answer would have to be no',
|
|||
|
'answer would be – no', 'thus answering “no”', 'answer = no',
|
|||
|
'answer should be no', 'answer would definitely be no',
|
|||
|
'answer would need to be no', 'answer would need to be marked no',
|
|||
|
'hence why i answered “no', "hence answering 'no'",
|
|||
|
'answer must necessarily remain no', 'answer should marked no',
|
|||
|
'answer would most likely be no', 'answer would also be no',
|
|||
|
'answer for now might have to be `no`', 'henceforth - answer no',
|
|||
|
'answer could only be no', 'answer would also be no',
|
|||
|
'henceforth answering “no', 'answer would be no', 'hence answering no',
|
|||
|
'cannot be identified', 'answer (yes or no ?): no', '答案为“不”',
|
|||
|
'henceforth answering no', '答案为:否', '答案应该是“否', '因果效应不可被'
|
|||
|
] + common_false_list
|
|||
|
if model_response.startswith(tuple(start_option1_list)):
|
|||
|
label = 1
|
|||
|
elif model_response.startswith(tuple(start_option2_list)):
|
|||
|
label = 0
|
|||
|
elif any(
|
|||
|
model_response.find(option) > -1 and
|
|||
|
(low_index := min(low_index, model_response.find(option))) > -1
|
|||
|
for option in inner_option1_list):
|
|||
|
label = 1
|
|||
|
if any(option in model_response
|
|||
|
and model_response.find(option) < low_index
|
|||
|
for option in inner_option2_list):
|
|||
|
label = 0
|
|||
|
elif any(response in model_response for response in inner_option2_list):
|
|||
|
label = 0
|
|||
|
else:
|
|||
|
return -1
|
|||
|
return label
|