mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
revise class name & remove csv file & add dataset-index.yml info
This commit is contained in:
parent
e7b04afa3c
commit
adc33cd4f8
@ -1023,3 +1023,9 @@
|
|||||||
paper: https://arxiv.org/pdf/2402.09391
|
paper: https://arxiv.org/pdf/2402.09391
|
||||||
configpath: opencompass/configs/datasets/SmolInstruct/smolinstruct_gen.py
|
configpath: opencompass/configs/datasets/SmolInstruct/smolinstruct_gen.py
|
||||||
configpath_llmjudge: ''
|
configpath_llmjudge: ''
|
||||||
|
- nejmaibench:
|
||||||
|
name: nejmaibench
|
||||||
|
category: Science /Medicine
|
||||||
|
paper: https://arxiv.org/pdf/2308.04709
|
||||||
|
configpath: opencompass/configs/datasets/nejm_ai_benchmark/nejmaibench_gen.py
|
||||||
|
configpath_llmjudge: opencompass/configs/datasets/nejm_ai_benchmark/nejmaibench_llmjudge_gen.py
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
|||||||
from opencompass.datasets import nejmaibenchDataset, nejmaibenchEvaluator
|
from opencompass.datasets import NejmaibenchDataset, NejmaibenchEvaluator
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
@ -8,14 +8,6 @@ import os
|
|||||||
SYSTEM_PROMPT = 'You are a helpful medical assistant.\n\n' # Where to put this?
|
SYSTEM_PROMPT = 'You are a helpful medical assistant.\n\n' # Where to put this?
|
||||||
ZERO_SHOT_PROMPT = 'Q: {question}\n Please select the correct answer from the options above and output only the corresponding letter (A, B, C, D, or E) without any explanation or additional text.\n'
|
ZERO_SHOT_PROMPT = 'Q: {question}\n Please select the correct answer from the options above and output only the corresponding letter (A, B, C, D, or E) without any explanation or additional text.\n'
|
||||||
|
|
||||||
# 将相对于当前文件的相对路径转换为绝对路径
|
|
||||||
def to_abs_path(relative_path: str) -> str:
|
|
||||||
# 当前脚本所在目录
|
|
||||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
# 拼接并规范化绝对路径
|
|
||||||
abs_path = os.path.abspath(os.path.join(base_dir, relative_path))
|
|
||||||
return abs_path
|
|
||||||
|
|
||||||
# Reader configuration
|
# Reader configuration
|
||||||
reader_cfg = dict(
|
reader_cfg = dict(
|
||||||
input_columns=[
|
input_columns=[
|
||||||
@ -50,13 +42,13 @@ infer_cfg = dict(
|
|||||||
|
|
||||||
# Evaluation configuration
|
# Evaluation configuration
|
||||||
eval_cfg = dict(
|
eval_cfg = dict(
|
||||||
evaluator=dict(type=nejmaibenchEvaluator),
|
evaluator=dict(type=NejmaibenchEvaluator),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
)
|
)
|
||||||
nejmaibench_dataset = dict(
|
nejmaibench_dataset = dict(
|
||||||
type=nejmaibenchDataset,
|
type=NejmaibenchDataset,
|
||||||
abbr='nejmaibench',
|
abbr='nejmaibench',
|
||||||
path=to_abs_path('data/NEJM_All_Questions_And_Answers.csv'),
|
path='opencompass/nejmaibench',
|
||||||
prompt_mode='zero-shot',
|
prompt_mode='zero-shot',
|
||||||
reader_cfg=reader_cfg,
|
reader_cfg=reader_cfg,
|
||||||
infer_cfg=infer_cfg,
|
infer_cfg=infer_cfg,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from opencompass.datasets import nejmaibenchDataset, nejmaibench_llmjudge_postprocess
|
from opencompass.datasets import NejmaibenchDataset, nejmaibench_llmjudge_postprocess
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
@ -29,15 +29,6 @@ GRADER_TEMPLATE = """
|
|||||||
Judging the correctness of candidates' answers:
|
Judging the correctness of candidates' answers:
|
||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
# 将相对于当前文件的相对路径转换为绝对路径
|
|
||||||
def to_abs_path(relative_path: str) -> str:
|
|
||||||
# 当前脚本所在目录
|
|
||||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
# 拼接并规范化绝对路径
|
|
||||||
abs_path = os.path.abspath(os.path.join(base_dir, relative_path))
|
|
||||||
return abs_path
|
|
||||||
|
|
||||||
# Reader configuration
|
# Reader configuration
|
||||||
reader_cfg = dict(
|
reader_cfg = dict(
|
||||||
input_columns=[
|
input_columns=[
|
||||||
@ -91,8 +82,8 @@ eval_cfg = dict(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
dataset_cfg=dict(
|
dataset_cfg=dict(
|
||||||
type=nejmaibenchDataset,
|
type=NejmaibenchDataset,
|
||||||
path=to_abs_path('data/NEJM_All_Questions_And_Answers.csv'),
|
path='opencompass/nejmaibench',
|
||||||
prompt_mode='zero-shot',
|
prompt_mode='zero-shot',
|
||||||
reader_cfg=reader_cfg,
|
reader_cfg=reader_cfg,
|
||||||
),
|
),
|
||||||
@ -103,9 +94,9 @@ eval_cfg = dict(
|
|||||||
|
|
||||||
|
|
||||||
nejmaibench_dataset = dict(
|
nejmaibench_dataset = dict(
|
||||||
type=nejmaibenchDataset,
|
type=NejmaibenchDataset,
|
||||||
abbr='nejmaibench',
|
abbr='nejmaibench',
|
||||||
path=to_abs_path('data/NEJM_All_Questions_And_Answers.csv'),
|
path='opencompass/nejmaibench',
|
||||||
prompt_mode='zero-shot',
|
prompt_mode='zero-shot',
|
||||||
reader_cfg=reader_cfg,
|
reader_cfg=reader_cfg,
|
||||||
infer_cfg=infer_cfg,
|
infer_cfg=infer_cfg,
|
||||||
|
@ -5,7 +5,7 @@ from datasets import Dataset
|
|||||||
|
|
||||||
from opencompass.openicl import BaseEvaluator
|
from opencompass.openicl import BaseEvaluator
|
||||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||||
from opencompass.utils import get_logger
|
from opencompass.utils import get_data_path, get_logger
|
||||||
|
|
||||||
from .base import BaseDataset
|
from .base import BaseDataset
|
||||||
|
|
||||||
@ -39,11 +39,12 @@ def _parse(item, prompt_mode):
|
|||||||
|
|
||||||
|
|
||||||
@LOAD_DATASET.register_module()
|
@LOAD_DATASET.register_module()
|
||||||
class nejmaibenchDataset(BaseDataset):
|
class NejmaibenchDataset(BaseDataset):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path: str, prompt_mode: str = 'zero-shot', **kwargs):
|
def load(path: str, prompt_mode: str = 'zero-shot', **kwargs):
|
||||||
# 读取 CSV 文件为 DataFrame,并将 NaN 转为空字符串
|
# 读取 CSV 文件为 DataFrame,并将 NaN 转为空字符串
|
||||||
|
path = get_data_path(path)
|
||||||
df = pd.read_csv(path, encoding='utf-8')
|
df = pd.read_csv(path, encoding='utf-8')
|
||||||
df = df.fillna('')
|
df = df.fillna('')
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ class nejmaibenchDataset(BaseDataset):
|
|||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
class nejmaibenchEvaluator(BaseEvaluator):
|
class NejmaibenchEvaluator(BaseEvaluator):
|
||||||
|
|
||||||
def score(self, predictions, references, test_set):
|
def score(self, predictions, references, test_set):
|
||||||
method = test_set['prompt_mode'][0]
|
method = test_set['prompt_mode'][0]
|
||||||
|
@ -446,6 +446,11 @@ DATASETS_MAPPING = {
|
|||||||
"hf_id": "",
|
"hf_id": "",
|
||||||
"local": "./data/ChemBench4K",
|
"local": "./data/ChemBench4K",
|
||||||
},
|
},
|
||||||
|
"opencompass/nejmaibench": {
|
||||||
|
"ms_id": "",
|
||||||
|
"hf_id": "SeanWu25/NEJM-AI_Benchmarking_Medical_Language_Models",
|
||||||
|
"local": "./opencompass/configs/datasets/nejm_ai_benchmark/data/NEJM_All_Questions_And_Answers.csv",
|
||||||
|
},
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user