diff --git a/dataset-index.yml b/dataset-index.yml index 9585f97c..1245c9c9 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -1023,3 +1023,15 @@ paper: https://arxiv.org/pdf/2402.09391 configpath: opencompass/configs/datasets/SmolInstruct/smolinstruct_gen.py configpath_llmjudge: '' +- medbullets: + name: Medbullets + category: Science /Medicine + paper: https://arxiv.org/pdf/2402.18060 + configpath: opencompass/configs/datasets/Medbullets/medbullets_gen.py + configpath_llmjudge: opencompass/configs/datasets/Medbullets/medbullets_llmjudge_gen.py +- medmcqa: + name: medmcqa + category: Science /Medicine + paper: https://arxiv.org/pdf/2203.14371 + configpath: opencompass/configs/datasets/medmcqa/medmcqa_gen.py + configpath_llmjudge: opencompass/configs/datasets/medmcqa/medmcqa_llmjudge_gen.py diff --git a/opencompass/configs/datasets/Medbullets/medbullets_gen_60c8f5.py b/opencompass/configs/datasets/Medbullets/medbullets_gen_60c8f5.py index 86a8f741..c506934a 100644 --- a/opencompass/configs/datasets/Medbullets/medbullets_gen_60c8f5.py +++ b/opencompass/configs/datasets/Medbullets/medbullets_gen_60c8f5.py @@ -8,14 +8,6 @@ import os SYSTEM_PROMPT = 'You are a helpful medical assistant.\n\n' # Where to put this? ZERO_SHOT_PROMPT = 'Q: {question}\n Please select the correct answer from the options above and output only the corresponding letter (A, B, C, D, or E) without any explanation or additional text.\n' -# 将相对于当前文件的相对路径转换为绝对路径 -def to_abs_path(relative_path: str) -> str: - # 当前脚本所在目录 - base_dir = os.path.dirname(os.path.abspath(__file__)) - # 拼接并规范化绝对路径 - abs_path = os.path.abspath(os.path.join(base_dir, relative_path)) - return abs_path - # Reader configuration reader_cfg = dict( input_columns=[ @@ -56,7 +48,7 @@ eval_cfg = dict( medbullets_dataset = dict( type=MedbulletsDataset, abbr='medbullets', - path=to_abs_path('data/medbullets.csv'), + path='opencompass/medbullets', prompt_mode='zero-shot', reader_cfg=reader_cfg, infer_cfg=infer_cfg, diff --git a/opencompass/configs/datasets/Medbullets/medbullets_llmjudge_gen_60c8f5.py b/opencompass/configs/datasets/Medbullets/medbullets_llmjudge_gen_60c8f5.py index 74f10d2f..3081ab74 100644 --- a/opencompass/configs/datasets/Medbullets/medbullets_llmjudge_gen_60c8f5.py +++ b/opencompass/configs/datasets/Medbullets/medbullets_llmjudge_gen_60c8f5.py @@ -29,15 +29,6 @@ GRADER_TEMPLATE = """ Judging the correctness of candidates' answers: """.strip() - -# 将相对于当前文件的相对路径转换为绝对路径 -def to_abs_path(relative_path: str) -> str: - # 当前脚本所在目录 - base_dir = os.path.dirname(os.path.abspath(__file__)) - # 拼接并规范化绝对路径 - abs_path = os.path.abspath(os.path.join(base_dir, relative_path)) - return abs_path - # Reader configuration reader_cfg = dict( input_columns=[ @@ -91,7 +82,7 @@ eval_cfg = dict( ), dataset_cfg=dict( type=MedbulletsDataset, - path=to_abs_path('data/medbullets.csv'), + path='opencompass/medbullets', prompt_mode='zero-shot', reader_cfg=reader_cfg, ), @@ -104,7 +95,7 @@ eval_cfg = dict( medbullets_dataset = dict( type=MedbulletsDataset, abbr='medbullets', - path=to_abs_path('data/medbullets.csv'), + path='opencompass/medbullets', prompt_mode='zero-shot', reader_cfg=reader_cfg, infer_cfg=infer_cfg, diff --git a/opencompass/datasets/Medbullets.py b/opencompass/datasets/Medbullets.py index 73a7e2ea..344a704f 100644 --- a/opencompass/datasets/Medbullets.py +++ b/opencompass/datasets/Medbullets.py @@ -5,7 +5,7 @@ from datasets import Dataset from opencompass.openicl import BaseEvaluator from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS -from opencompass.utils import get_logger +from opencompass.utils import get_data_path, get_logger from .base import BaseDataset @@ -39,6 +39,7 @@ class MedbulletsDataset(BaseDataset): @staticmethod def load(path: str, prompt_mode: str = 'zero-shot', **kwargs): # 读取 CSV 文件为 DataFrame,并将 NaN 转为空字符串 + path = get_data_path(path) df = pd.read_csv(path, encoding='utf-8') df = df.fillna('') diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 5048a496..b80c756a 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -446,6 +446,11 @@ DATASETS_MAPPING = { "hf_id": "", "local": "./data/ChemBench4K", }, + "opencompass/medbullets": { + "ms_id": "", + "hf_id": "", + "local": "./opencompass/configs/datasets/Medbullets/data/medbullets.csv", + }, }