2023-10-19 12:37:35 +08:00
|
|
|
from rouge_chinese import Rouge
|
|
|
|
import jieba
|
|
|
|
from nltk.translate.gleu_score import corpus_gleu
|
|
|
|
|
|
|
|
def compute_f1_two_sets(pred_set, gt_set):
|
|
|
|
precision = len(pred_set.intersection(gt_set)) / len(pred_set) if len(pred_set) > 0 else 0
|
|
|
|
recall = len(pred_set.intersection(gt_set)) / len(gt_set) if len(gt_set) > 0 else 0
|
|
|
|
f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
|
|
|
|
return f1
|
|
|
|
|
|
|
|
def multi_choice_judge(prediction, option_list, answer_token):
|
|
|
|
# a dict, key: letters in the option list, value: count of the letter in the prediction
|
|
|
|
count_dict, abstention, accuracy = {}, 0, 0
|
|
|
|
for option in option_list:
|
|
|
|
option_count = prediction.count(option)
|
|
|
|
count_dict[option] = 1 if option_count > 0 else 0 # multiple occurrence of the same letter is counted as 1
|
|
|
|
|
|
|
|
if sum(count_dict.values()) == 0:
|
|
|
|
abstention = 1
|
|
|
|
# if the answer token is the only predicted token, the prediction is correct
|
|
|
|
elif count_dict[answer_token] == 1 and sum(count_dict.values()) == 1:
|
|
|
|
accuracy = 1
|
|
|
|
return {"score": accuracy, "abstention": abstention}
|
|
|
|
|
|
|
|
"""
|
|
|
|
compute the rouge score.
|
|
|
|
hyps and refs are lists of hyposisis and reference strings
|
|
|
|
empty predictions are replaces with 无内容
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def compute_rouge(hyps, refs):
|
|
|
|
assert(len(hyps) == len(refs))
|
|
|
|
hyps = [' '.join(jieba.cut(h)) for h in hyps]
|
|
|
|
hyps = [h if h.strip() != "" else "无内容" for h in hyps]
|
|
|
|
refs = [' '.join(jieba.cut(r)) for r in refs]
|
|
|
|
return Rouge().get_scores(hyps, refs)
|
|
|
|
|
|
|
|
"""
|
|
|
|
compute the gleu score.
|
|
|
|
hyps and refs are lists of hyposisis and reference strings
|
|
|
|
empty predictions are replaces with 无内容
|
|
|
|
"""
|
|
|
|
def compute_gleu(hyps, refs):
|
|
|
|
assert(len(hyps) == len(refs))
|
|
|
|
hyps = [' '.join(jieba.cut(h)) for h in hyps]
|
|
|
|
hyps = [h if h.strip() != "" else "无内容" for h in hyps]
|
|
|
|
refs = [[' '.join(jieba.cut(r))] for r in refs]
|
|
|
|
return corpus_gleu(refs, hyps)
|