OpenCompass/opencompass/datasets/lawbench/evaluation_functions/ydlj.py
Leymore 861942ab1b
[Feature] Add lawbench (#460)
* add lawbench

* update requirements

* update
2023-10-13 06:51:36 -05:00

18 lines
548 B
Python

from ..utils.comprehension_scores import compute_rc_f1
"""
Task: machine reading comprehension
Metric: F1 score
法律阅读理解
"""
def compute_ydlj(data_dict):
references, predictions = [], []
for example in data_dict:
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"]
answer = answer.replace("回答:", "")
predictions.append(prediction)
references.append(answer)
f1_score = compute_rc_f1(predictions, references)
return f1_score