mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add calm dataset * modify config max_out_len * update README * Modify README * update README * update README * update README * update README * update README * add summarizer and modify readme * delete summarizer config comment * update summarizer * modify same response to all questions * update README
38 lines
2.5 KiB
Python
38 lines
2.5 KiB
Python
from .common_answers import common_true_list, common_false_list, common_option_1_list, common_option_2_list, common_option_3_list, common_option_4_list, common_start_true_dict, common_start_false_dict, common_start_op1_dict, common_start_op2_dict, common_start_op3_dict, common_start_op4_dict
|
|
|
|
def get_gt_label(item):
|
|
return item["gt_answer"]
|
|
|
|
def get_pred_label(model_response, item, prompt_style, type):
|
|
model_response = model_response.strip().lower()
|
|
low_index = len(model_response)
|
|
start_str1_dict = common_start_true_dict
|
|
start_str2_dict = common_start_false_dict
|
|
|
|
start_option1_list,start_option2_list = [],[]
|
|
# some of the model will give response containing the question, we usually preprocess the response to remove the question part, but sometimes due to the model's response format, some of the question part is not removed, so here we are checking the response with the question part as well.
|
|
for key1, key2 in zip(start_str1_dict.keys(), start_str2_dict.keys()):
|
|
for str1,str2 in zip(start_str1_dict[key1], start_str2_dict[key2]):
|
|
for i in range(key1, len(str1)+1):
|
|
start_option1_list.append(str1[-i:])
|
|
for i in range(key2, len(str2)+1):
|
|
start_option2_list.append(str2[-i:])
|
|
|
|
inner_option1_list = ["serves as the parent node of","serves as a parent node of"]+common_true_list
|
|
inner_option2_list = common_false_list
|
|
if model_response.startswith(tuple(start_option1_list)):
|
|
label = 1
|
|
elif model_response.startswith(tuple(start_option2_list)):
|
|
label = 0
|
|
elif any(model_response.find(option)>-1 and (low_index:=min(low_index, model_response.find(option)))>-1 for option in inner_option1_list) \
|
|
or "yes" in model_response and ("is the ancestor of" in model_response or "is an ancestor of" in model_response or "serves as the ancestor node of" in model_response or "serves as an ancestor node of" in model_response) \
|
|
or "是" in model_response and "祖先节点" in model_response:
|
|
label = 1
|
|
if any(option in model_response and model_response.find(option) < low_index for option in inner_option2_list):
|
|
label = 0
|
|
elif any(response in model_response for response in inner_option2_list)\
|
|
or "不是" in model_response and "祖先节点" in model_response:
|
|
label = 0
|
|
else:
|
|
return -1
|
|
return label |