From b6d1bc60dc74d411375d0b8eebf4196935b22b6f Mon Sep 17 00:00:00 2001 From: xuxuxuxuxuxjh <1073311322@qq.com> Date: Wed, 7 May 2025 23:05:36 +0800 Subject: [PATCH] Update datasets_info & hf_path --- dataset-index.yml | 18 ++++++++++++++++++ .../ClinicBench_llmjudge_gen_d09668.py | 1 - .../PubMedQA/PubMedQA_llmjudge_gen_f00302.py | 1 - .../ScienceQA/ScienceQA_llmjudge_gen_f00302.py | 1 - opencompass/datasets/ClinicBench.py | 15 ++++++--------- opencompass/datasets/PubMedQA.py | 15 ++++++--------- opencompass/datasets/ScienceQA.py | 15 ++++++--------- opencompass/utils/datasets_info.py | 15 +++++++++++++++ 8 files changed, 51 insertions(+), 30 deletions(-) diff --git a/dataset-index.yml b/dataset-index.yml index 9585f97c..41fbc378 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -128,6 +128,24 @@ paper: https://arxiv.org/abs/2501.18362 configpath: opencompass/configs/datasets/MedXpertQA/MedXpertQA_gen.py configpath_llmjudge: opencompass/configs/datasets/MedXpertQA/MedXpertQA_llmjudge_gen.py +- ClinicBench: + name: ClinicBench + category: Knowledge / Medicine + paper: https://arxiv.org/abs/2405.00716 + configpath: '' + configpath_llmjudge: opencompass/configs/datasets/ClinicBench/ClinicBench_llmjudge_gen.py +- ScienceQA: + name: ScienceQA + category: Knowledge / Medicine + paper: https://arxiv.org/abs/2209.09513 + configpath: '' + configpath_llmjudge: opencompass/configs/datasets/ScienceQA/ScienceQA_llmjudge_gen.py +- PubMedQA: + name: PubMedQA + category: Knowledge / Medicine + paper: https://arxiv.org/abs/1909.06146 + configpath: '' + configpath_llmjudge: opencompass/configs/datasets/PubMedQA/PubMedQA_llmjudge_gen.py - musr: name: MuSR category: Reasoning diff --git a/opencompass/configs/datasets/ClinicBench/ClinicBench_llmjudge_gen_d09668.py b/opencompass/configs/datasets/ClinicBench/ClinicBench_llmjudge_gen_d09668.py index c6c2e4f3..bf8fdd28 100644 --- a/opencompass/configs/datasets/ClinicBench/ClinicBench_llmjudge_gen_d09668.py +++ b/opencompass/configs/datasets/ClinicBench/ClinicBench_llmjudge_gen_d09668.py @@ -45,7 +45,6 @@ ClinicBench_datasets = [] ClinicBench_reader_cfg = dict( input_columns=['question', 'choices'], output_column='label', - test_split='validation', ) ClinicBench_infer_cfg = dict( diff --git a/opencompass/configs/datasets/PubMedQA/PubMedQA_llmjudge_gen_f00302.py b/opencompass/configs/datasets/PubMedQA/PubMedQA_llmjudge_gen_f00302.py index 2315dc00..86526aca 100644 --- a/opencompass/configs/datasets/PubMedQA/PubMedQA_llmjudge_gen_f00302.py +++ b/opencompass/configs/datasets/PubMedQA/PubMedQA_llmjudge_gen_f00302.py @@ -39,7 +39,6 @@ PubMedQA_datasets = [] PubMedQA_reader_cfg = dict( input_columns=['question', 'choices'], output_column='label', - test_split='validation', ) PubMedQA_infer_cfg = dict( diff --git a/opencompass/configs/datasets/ScienceQA/ScienceQA_llmjudge_gen_f00302.py b/opencompass/configs/datasets/ScienceQA/ScienceQA_llmjudge_gen_f00302.py index 2b102662..f363f5af 100644 --- a/opencompass/configs/datasets/ScienceQA/ScienceQA_llmjudge_gen_f00302.py +++ b/opencompass/configs/datasets/ScienceQA/ScienceQA_llmjudge_gen_f00302.py @@ -39,7 +39,6 @@ ScienceQA_datasets = [] ScienceQA_reader_cfg = dict( input_columns=['question', 'choices'], output_column='label', - test_split='validation', ) ScienceQA_infer_cfg = dict( diff --git a/opencompass/datasets/ClinicBench.py b/opencompass/datasets/ClinicBench.py index 1d28bef3..dbcc7090 100644 --- a/opencompass/datasets/ClinicBench.py +++ b/opencompass/datasets/ClinicBench.py @@ -1,6 +1,7 @@ -from datasets import Dataset, DatasetDict, load_dataset +from datasets import load_dataset from opencompass.registry import LOAD_DATASET +from opencompass.utils import get_data_path from .base import BaseDataset @@ -9,16 +10,12 @@ from .base import BaseDataset class ClinicBenchDataset(BaseDataset): @staticmethod - def load_single(): - dataset = load_dataset('xuxuxuxuxu/Pharmacology-QA')['train'] + def load_single(path): + dataset = load_dataset(path)['train'] return dataset @staticmethod def load(path): - train_dataset = Dataset.from_list([]) - val_dataset = ClinicBenchDataset.load_single() - dataset = DatasetDict({ - 'train': train_dataset, - 'validation': val_dataset - }) + path = get_data_path(path) + dataset = ClinicBenchDataset.load_single(path) return dataset diff --git a/opencompass/datasets/PubMedQA.py b/opencompass/datasets/PubMedQA.py index a7bb3fdb..828e4bbc 100644 --- a/opencompass/datasets/PubMedQA.py +++ b/opencompass/datasets/PubMedQA.py @@ -1,6 +1,7 @@ -from datasets import Dataset, DatasetDict, load_dataset +from datasets import Dataset, load_dataset from opencompass.registry import LOAD_DATASET +from opencompass.utils import get_data_path from .base import BaseDataset @@ -9,9 +10,9 @@ from .base import BaseDataset class PubMedQADataset(BaseDataset): @staticmethod - def load_single(): + def load_single(path): dataset = [] - ds = load_dataset('qiaojin/PubMedQA', 'pqa_labeled') + ds = load_dataset(path, 'pqa_labeled') for data in ds['train']: data['question'] = (f"CONTEXTS: {data['context']}\n" f"QUESTION: {data['question']}") @@ -30,10 +31,6 @@ class PubMedQADataset(BaseDataset): @staticmethod def load(path): - train_dataset = Dataset.from_list([]) - val_dataset = PubMedQADataset.load_single() - dataset = DatasetDict({ - 'train': train_dataset, - 'validation': val_dataset - }) + path = get_data_path(path) + dataset = PubMedQADataset.load_single(path) return dataset diff --git a/opencompass/datasets/ScienceQA.py b/opencompass/datasets/ScienceQA.py index db592d3f..adb8ea0c 100644 --- a/opencompass/datasets/ScienceQA.py +++ b/opencompass/datasets/ScienceQA.py @@ -1,6 +1,7 @@ -from datasets import Dataset, DatasetDict, load_dataset +from datasets import Dataset, load_dataset from opencompass.registry import LOAD_DATASET +from opencompass.utils import get_data_path from .base import BaseDataset @@ -9,9 +10,9 @@ from .base import BaseDataset class ScienceQADataset(BaseDataset): @staticmethod - def load_single(): + def load_single(path): dataset = [] - ds = load_dataset('derek-thomas/ScienceQA') + ds = load_dataset(path) for data in ds['test']: if data['image'] is None: data['label'] = chr(65 + data['answer'] @@ -28,10 +29,6 @@ class ScienceQADataset(BaseDataset): @staticmethod def load(path): - train_dataset = Dataset.from_list([]) - val_dataset = ScienceQADataset.load_single() - dataset = DatasetDict({ - 'train': train_dataset, - 'validation': val_dataset - }) + path = get_data_path(path) + dataset = ScienceQADataset.load_single(path) return dataset diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 5048a496..6d31da97 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -446,6 +446,21 @@ DATASETS_MAPPING = { "hf_id": "", "local": "./data/ChemBench4K", }, + "opencompass/ClinicBench": { + "ms_id": "", + "hf_id": "xuxuxuxuxu/Pharmacology-QA", + "local": "", + }, + "opencompass/ScienceQA": { + "ms_id": "", + "hf_id": "derek-thomas/ScienceQA", + "local": "", + }, + "opencompass/PubMedQA": { + "ms_id": "", + "hf_id": "qiaojin/PubMedQA", + "local": "", + }, }