OpenCompass/opencompass/datasets/math401.py
2024-01-08 14:07:24 +00:00

31 lines
878 B
Python

from opencompass.openicl import BaseEvaluator
def check(a, b):
return abs(float(a) - float(b)) < 1e-3
class Math401Evaluator(BaseEvaluator):
def score(self, predictions, references):
if len(predictions) != len(references):
return {
'error': 'predictions and references have different '
'length'
}
correct = 0
count = 0
details = []
for i, j in zip(predictions, references):
detail = {'pred': i, 'answer': j, 'correct': False}
count += 1
try:
if check(i, j):
correct += 1
detail['correct'] = True
except Exception:
pass
details.append(detail)
result = {'accuracy': 100 * correct / count, 'details': details}
return result