mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
from ..utils.function_utils import compute_f1_two_sets
|
|
from ..utils.rc_f1 import CJRCEvaluator
|
|
|
|
|
|
"""
|
|
task: event detection
|
|
metric: F1 score
|
|
事件检测
|
|
"""
|
|
option_list = ["支付/给付", "欺骗", "搜查/扣押", "要求/请求", "卖出", "买入", "获利", "拘捕", "鉴定", "同意/接受", "供述", "联络", "帮助/救助", "租用/借用", "受伤", "伪造", "卖淫", "伤害人身", "赔偿", "归还/偿还"]
|
|
|
|
def compute_sjjc(data_dict):
|
|
"""
|
|
Compute the F1-score
|
|
The sjjc task covers 20 event types.
|
|
A question may involve one or more event types.
|
|
Given a list of event types from both the ground truth and the prediction, we compute the F1-score between
|
|
these two lists.
|
|
"""
|
|
score_list, abstentions = [], 0
|
|
|
|
for example in data_dict:
|
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"]
|
|
|
|
answers = answer.split(";")
|
|
|
|
prediction_list =[]
|
|
for option in option_list:
|
|
if option in prediction:
|
|
prediction_list.append(option)
|
|
|
|
if len(prediction_list) == 0:
|
|
abstentions += 1
|
|
gt_set = set(answers)
|
|
pred_set = set(prediction_list)
|
|
score = compute_f1_two_sets(gt_set, pred_set)
|
|
score_list.append(score)
|
|
|
|
f1_score_average = sum(score_list) / len(score_list)
|
|
return {"score": f1_score_average, "abstention_rate": abstentions/len(data_dict)}
|
|
|
|
"""
|
|
task: trigger word extraction
|
|
metric: F1 score
|
|
触发词抽取
|
|
"""
|
|
def compute_cfcy(data_dict):
|
|
|
|
scores = 0
|
|
|
|
for example in data_dict:
|
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"]
|
|
|
|
answers = answer.split(";")
|
|
predictions = prediction.split(";")
|
|
intersected = [CJRCEvaluator.compute_f1(r, h) for r, h in zip(answers, predictions)]
|
|
|
|
prec = sum(intersected) / len(predictions) if len(predictions) > 0 else 0
|
|
rec = sum(intersected) / len(answers) if len(answers) > 0 else 0
|
|
# print(prec, rec, intersected)
|
|
scores += 2 * prec * rec / (prec + rec + 1e-10)
|
|
|
|
f1_score_average = scores / len(data_dict)
|
|
return {"score": f1_score_average}
|