mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
37 lines
1.6 KiB
Python
37 lines
1.6 KiB
Python
![]() |
from ..utils.function_utils import multi_choice_judge
|
|||
|
|
|||
|
"""
|
|||
|
multi-choice single-label selection
|
|||
|
metric: accuracy
|
|||
|
争议焦点:识别案件涉及的争议焦点
|
|||
|
"""
|
|||
|
|
|||
|
def compute_jdzy(data_dict):
|
|||
|
"""
|
|||
|
Compute the Accuracy
|
|||
|
The JEC dataset has 16 possible answers for each question, stored in the option_list
|
|||
|
A prediction is correct if
|
|||
|
1. The correct answer appears in the prediction, and
|
|||
|
2. Options other than the answer do not appear in the prediction.
|
|||
|
"""
|
|||
|
|
|||
|
score_list, abstentions = [], 0
|
|||
|
option_list = ["诉讼主体", "租金情况", "利息", "本金争议", "责任认定", "责任划分", "损失认定及处理",
|
|||
|
"原审判决是否适当", "合同效力", "财产分割", "责任承担", "鉴定结论采信问题", "诉讼时效", "违约", "合同解除", "肇事逃逸"]
|
|||
|
for example in data_dict:
|
|||
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"]
|
|||
|
if answer[7:-1] == "赔偿":
|
|||
|
# todo: dataset imperfection
|
|||
|
continue
|
|||
|
assert answer.startswith("争议焦点类别:") and answer[7:-1] in option_list, \
|
|||
|
f"answer: {answer} \n question: {question}"
|
|||
|
|
|||
|
answer_letter = answer[7:-1]
|
|||
|
judge = multi_choice_judge(prediction, option_list, answer_letter)
|
|||
|
score_list.append(judge["score"])
|
|||
|
abstentions += judge["abstention"]
|
|||
|
|
|||
|
# compute the accuracy of score_list
|
|||
|
accuracy = sum(score_list) / len(score_list)
|
|||
|
return {"score": accuracy, "abstention_rate": abstentions / len(data_dict)}
|