Fix bugs for MedQA. Add info in dataset-index

This commit is contained in:
Yejin0111 2025-05-08 14:41:15 +00:00
parent 63f80134c8
commit d28e3e4c80
5 changed files with 106 additions and 75 deletions

View File

@ -122,6 +122,12 @@
paper: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10778138
configpath: opencompass/configs/datasets/MedBench/medbench_gen.py
configpath_llmjudge: ''
- MedXpertQA:
name: MedQA
category: Knowledge / Medicine
paper: https://arxiv.org/abs/2009.13081
configpath: opencompass/configs/datasets/MedQA/MedQA_gen.py
configpath_llmjudge: opencompass/configs/datasets/MedQA/MedQA_llmjudge_gen.py
- MedXpertQA:
name: MedXpertQA
category: Knowledge / Medicine
@ -739,6 +745,12 @@
paper: https://arxiv.org/pdf/1911.11641v1
configpath: opencompass/configs/datasets/piqa/piqa_gen.py
configpath_llmjudge: ''
- ProteinLMBench:
name: ProteinLMBench
category: Knowledge / Biology (Protein)
paper: https://arxiv.org/abs/2406.05540
configpath: opencompass/configs/datasets/ProteinLMBench/ProteinLMBench_gen.py
configpath_llmjudge: opencompass/configs/datasets/ProteinLMBench/ProteinLMBench_llmjudge_gen.py
- py150:
name: py150
category: Code

View File

@ -5,37 +5,59 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets.MedQA import MedQADataset
QUERY_TEMPLATE = """
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of Options(e.g. one of ABCDEFGHIJKLMNOP). Think step by step before answering.
Question:\n
{question}
Options:\n
{choices}
""".strip()
MedQA_datasets = []
MedQA_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'choices'],
input_columns=['question', 'choices'],
output_column='label',
test_split='validation')
)
MedQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role='HUMAN',
prompt='\nQuestion: {question}\n{choices}\nAnswer:'
)
], ),
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
MedQA_subsets = {
'US': 'xuxuxuxuxu/MedQA_US_test',
'Mainland': 'xuxuxuxuxu/MedQA_Mainland_test',
'Taiwan': 'xuxuxuxuxu/MedQA_Taiwan_test',
}
for split in list(MedQA_subsets.keys()):
MedQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')
)
MedQA_datasets = [
MedQA_datasets.append(
dict(
abbr='MedQA',
abbr=f'MedQA_{split}',
type=MedQADataset,
path='opencompass/MedQA',
path=MedQA_subsets[split],
reader_cfg=MedQA_reader_cfg,
infer_cfg=MedQA_infer_cfg,
eval_cfg=MedQA_eval_cfg)
]
eval_cfg=MedQA_eval_cfg,
)
)

View File

@ -43,9 +43,8 @@ GRADER_TEMPLATE = """
MedQA_datasets = []
MedQA_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'choices'],
input_columns=['question', 'choices'],
output_column='label',
test_split='validation',
)
MedQA_infer_cfg = dict(
@ -61,6 +60,14 @@ MedQA_infer_cfg = dict(
inferencer=dict(type=GenInferencer),
)
MedQA_subsets = {
'US': 'xuxuxuxuxu/MedQA_US_test',
'Mainland': 'xuxuxuxuxu/MedQA_Mainland_test',
'Taiwan': 'xuxuxuxuxu/MedQA_Taiwan_test',
}
for split in list(MedQA_subsets.keys()):
MedQA_eval_cfg = dict(
evaluator=dict(
type=GenericLLMEvaluator,
@ -81,7 +88,7 @@ MedQA_eval_cfg = dict(
),
dataset_cfg=dict(
type=MedQADataset,
path='opencompass/MedQA',
path=MedQA_subsets[split],
reader_cfg=MedQA_reader_cfg,
),
judge_cfg=dict(),
@ -91,9 +98,9 @@ MedQA_eval_cfg = dict(
MedQA_datasets.append(
dict(
abbr=f'MedQA',
abbr=f'MedQA_{split}',
type=MedQADataset,
path='opencompass/MedQA',
path=MedQA_subsets[split],
reader_cfg=MedQA_reader_cfg,
infer_cfg=MedQA_infer_cfg,
eval_cfg=MedQA_eval_cfg,

View File

@ -1,4 +1,4 @@
from datasets import Dataset, DatasetDict, load_dataset
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET
@ -11,18 +11,13 @@ class MedQADataset(BaseDataset):
@staticmethod
def load_single(path):
dataset = []
data_lines = load_dataset(path, 'test') # "data/MedQA"
num = 0
for data in data_lines:
num += 1
ds = load_dataset(path)
for data in ds['train']:
data['label'] = data['answer_idx']
choices = ''
for i in range(4):
data[chr(65 + i)] = data['ending' + str(i)]
choices += chr(65 + i) + '. ' + data['ending' + str(i)] + '\n'
data['question'] = data['sent1']
for option in data['options']:
choices += option + '. ' + data['options'][option] + '\n'
data['choices'] = choices
data['label'] = chr(65 + int(data['label'])) + '. ' + data[
'ending' + str(data['label'])]
dataset.append(data)
@ -30,10 +25,5 @@ class MedQADataset(BaseDataset):
@staticmethod
def load(path):
train_dataset = Dataset.from_list([])
val_dataset = MedQADataset.load_single(path) # "data/MedQA/test.json"
dataset = DatasetDict({
'train': train_dataset,
'validation': val_dataset
})
dataset = MedQADataset.load_single(path)
return dataset