diff --git a/opencompass/configs/datasets/matbench/matbench_gen.py b/opencompass/configs/datasets/matbench/matbench_gen.py index c3b4af4f..8b8a676b 100644 --- a/opencompass/configs/datasets/matbench/matbench_gen.py +++ b/opencompass/configs/datasets/matbench/matbench_gen.py @@ -47,6 +47,7 @@ for task in matbench_tasks: dict( type=MatbenchDataset, path=f'opencompass/Matbench', + task=task, abbr=task, reader_cfg=matbench_reader_cfg, infer_cfg=matbench_infer_cfg, diff --git a/opencompass/datasets/matbench/matbench.py b/opencompass/datasets/matbench/matbench.py index 56b24bd4..d0faa6f2 100644 --- a/opencompass/datasets/matbench/matbench.py +++ b/opencompass/datasets/matbench/matbench.py @@ -1,4 +1,5 @@ import json +import os from datasets import Dataset from sklearn.metrics import (accuracy_score, f1_score, precision_score, @@ -8,6 +9,7 @@ from opencompass.datasets.matbench.post_process import (parse_float_answer, parse_true_false_answer ) from opencompass.openicl.icl_evaluator import BaseEvaluator +from opencompass.utils import get_data_path from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET from ..base import BaseDataset @@ -17,7 +19,9 @@ from ..base import BaseDataset class MatbenchDataset(BaseDataset): @staticmethod - def load(path): + def load(path, task): + path = get_data_path(path) + path = os.path.join(path, 'matbench_base_fold_0_' + task + '_test.json') dataset = [] with open(path, 'r', encoding='utf-8') as file: data = json.load(file) diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 9166f7bf..0383f8b8 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -634,6 +634,11 @@ DATASETS_URL = { "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SQuAD2.0.zip", "md5": "1321cbf9349e1102a57d31d1b2bfdd7e", }, + "/Matbench":{ + "url": + "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/Matbench.zip", + "md5": "99f9457f54f4f419da9556af56ac4c24", + }, "mmlu_pro": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu_pro.zip",