OpenCompass/opencompass/datasets/math401.py

from opencompass.openicl import BaseEvaluator


def check(a, b):
    return abs(float(a) - float(b)) < 1e-3


class Math401Evaluator(BaseEvaluator):

    def score(self, predictions, references):
        if len(predictions) != len(references):
            return {
                'error': 'predictions and references have different '
                'length'
            }
        correct = 0
        count = 0
        details = []
        for i, j in zip(predictions, references):
            detail = {'pred': i, 'answer': j, 'correct': False}
            count += 1
            try:
                if check(i, j):
                    correct += 1
                    detail['correct'] = True
            except Exception:
                pass
            details.append(detail)
        result = {'accuracy': 100 * correct / count, 'details': details}
        return result