mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add evaluation of scibench * add evaluation of scibench * update scibench * remove scibench evaluator --------- Co-authored-by: Leymore <zfz-960727@163.com>
51 lines
1.2 KiB
Python
51 lines
1.2 KiB
Python
import json
|
|
import os.path as osp
|
|
import re
|
|
|
|
from datasets import Dataset
|
|
|
|
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class ScibenchDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str, name: str):
|
|
train_data = []
|
|
|
|
filename = osp.join(path, f'{name}.json')
|
|
with open(filename, 'r') as infile:
|
|
raw_data = json.load(infile)
|
|
|
|
for entry in raw_data:
|
|
train_data.append({
|
|
'question': entry['problem_text'].strip(),
|
|
'answer': entry['answer_number'].strip()
|
|
})
|
|
|
|
dataset = Dataset.from_list(train_data)
|
|
return dataset
|
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('scibench')
|
|
def scibench_postprocess(text: str) -> str:
|
|
ans = text
|
|
ans_line = ans.split('answer is')
|
|
if len(ans_line) != 1:
|
|
ans = ans_line[1].strip()
|
|
|
|
match = re.search(r'\\boxed\{(.+?)\}', ans)
|
|
if match:
|
|
extracted_content = match.group(1)
|
|
return extracted_content
|
|
|
|
output = re.sub(r'(\d),(\d)', r'\1\2', ans)
|
|
numbers = re.findall(r'-?\d*\.?\d+|\d+', output)
|
|
if numbers:
|
|
return numbers[-1]
|
|
|
|
return ans
|