mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Fix bugs for MedQA. Add info in dataset-index
This commit is contained in:
parent
63f80134c8
commit
d28e3e4c80
@ -122,6 +122,12 @@
|
||||
paper: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10778138
|
||||
configpath: opencompass/configs/datasets/MedBench/medbench_gen.py
|
||||
configpath_llmjudge: ''
|
||||
- MedXpertQA:
|
||||
name: MedQA
|
||||
category: Knowledge / Medicine
|
||||
paper: https://arxiv.org/abs/2009.13081
|
||||
configpath: opencompass/configs/datasets/MedQA/MedQA_gen.py
|
||||
configpath_llmjudge: opencompass/configs/datasets/MedQA/MedQA_llmjudge_gen.py
|
||||
- MedXpertQA:
|
||||
name: MedXpertQA
|
||||
category: Knowledge / Medicine
|
||||
@ -739,6 +745,12 @@
|
||||
paper: https://arxiv.org/pdf/1911.11641v1
|
||||
configpath: opencompass/configs/datasets/piqa/piqa_gen.py
|
||||
configpath_llmjudge: ''
|
||||
- ProteinLMBench:
|
||||
name: ProteinLMBench
|
||||
category: Knowledge / Biology (Protein)
|
||||
paper: https://arxiv.org/abs/2406.05540
|
||||
configpath: opencompass/configs/datasets/ProteinLMBench/ProteinLMBench_gen.py
|
||||
configpath_llmjudge: opencompass/configs/datasets/ProteinLMBench/ProteinLMBench_llmjudge_gen.py
|
||||
- py150:
|
||||
name: py150
|
||||
category: Code
|
||||
|
@ -5,37 +5,59 @@ from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
from opencompass.datasets.MedQA import MedQADataset
|
||||
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of Options(e.g. one of ABCDEFGHIJKLMNOP). Think step by step before answering.
|
||||
|
||||
Question:\n
|
||||
{question}
|
||||
|
||||
Options:\n
|
||||
{choices}
|
||||
|
||||
""".strip()
|
||||
|
||||
|
||||
MedQA_datasets = []
|
||||
|
||||
MedQA_reader_cfg = dict(
|
||||
input_columns=['question', 'A', 'B', 'C', 'D', 'choices'],
|
||||
input_columns=['question', 'choices'],
|
||||
output_column='label',
|
||||
test_split='validation')
|
||||
)
|
||||
|
||||
MedQA_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='\nQuestion: {question}\n{choices}\nAnswer:'
|
||||
)
|
||||
], ),
|
||||
dict(role='HUMAN', prompt=QUERY_TEMPLATE),
|
||||
],
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
MedQA_subsets = {
|
||||
'US': 'xuxuxuxuxu/MedQA_US_test',
|
||||
'Mainland': 'xuxuxuxuxu/MedQA_Mainland_test',
|
||||
'Taiwan': 'xuxuxuxuxu/MedQA_Taiwan_test',
|
||||
}
|
||||
|
||||
for split in list(MedQA_subsets.keys()):
|
||||
|
||||
MedQA_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')
|
||||
)
|
||||
|
||||
MedQA_datasets = [
|
||||
MedQA_datasets.append(
|
||||
dict(
|
||||
abbr='MedQA',
|
||||
abbr=f'MedQA_{split}',
|
||||
type=MedQADataset,
|
||||
path='opencompass/MedQA',
|
||||
path=MedQA_subsets[split],
|
||||
reader_cfg=MedQA_reader_cfg,
|
||||
infer_cfg=MedQA_infer_cfg,
|
||||
eval_cfg=MedQA_eval_cfg)
|
||||
]
|
||||
eval_cfg=MedQA_eval_cfg,
|
||||
)
|
||||
)
|
||||
|
@ -43,9 +43,8 @@ GRADER_TEMPLATE = """
|
||||
MedQA_datasets = []
|
||||
|
||||
MedQA_reader_cfg = dict(
|
||||
input_columns=['question', 'A', 'B', 'C', 'D', 'choices'],
|
||||
input_columns=['question', 'choices'],
|
||||
output_column='label',
|
||||
test_split='validation',
|
||||
)
|
||||
|
||||
MedQA_infer_cfg = dict(
|
||||
@ -61,6 +60,14 @@ MedQA_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
MedQA_subsets = {
|
||||
'US': 'xuxuxuxuxu/MedQA_US_test',
|
||||
'Mainland': 'xuxuxuxuxu/MedQA_Mainland_test',
|
||||
'Taiwan': 'xuxuxuxuxu/MedQA_Taiwan_test',
|
||||
}
|
||||
|
||||
for split in list(MedQA_subsets.keys()):
|
||||
|
||||
MedQA_eval_cfg = dict(
|
||||
evaluator=dict(
|
||||
type=GenericLLMEvaluator,
|
||||
@ -81,7 +88,7 @@ MedQA_eval_cfg = dict(
|
||||
),
|
||||
dataset_cfg=dict(
|
||||
type=MedQADataset,
|
||||
path='opencompass/MedQA',
|
||||
path=MedQA_subsets[split],
|
||||
reader_cfg=MedQA_reader_cfg,
|
||||
),
|
||||
judge_cfg=dict(),
|
||||
@ -91,9 +98,9 @@ MedQA_eval_cfg = dict(
|
||||
|
||||
MedQA_datasets.append(
|
||||
dict(
|
||||
abbr=f'MedQA',
|
||||
abbr=f'MedQA_{split}',
|
||||
type=MedQADataset,
|
||||
path='opencompass/MedQA',
|
||||
path=MedQA_subsets[split],
|
||||
reader_cfg=MedQA_reader_cfg,
|
||||
infer_cfg=MedQA_infer_cfg,
|
||||
eval_cfg=MedQA_eval_cfg,
|
@ -1,4 +1,4 @@
|
||||
from datasets import Dataset, DatasetDict, load_dataset
|
||||
from datasets import Dataset, load_dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
@ -11,18 +11,13 @@ class MedQADataset(BaseDataset):
|
||||
@staticmethod
|
||||
def load_single(path):
|
||||
dataset = []
|
||||
data_lines = load_dataset(path, 'test') # "data/MedQA"
|
||||
num = 0
|
||||
for data in data_lines:
|
||||
num += 1
|
||||
ds = load_dataset(path)
|
||||
for data in ds['train']:
|
||||
data['label'] = data['answer_idx']
|
||||
choices = ''
|
||||
for i in range(4):
|
||||
data[chr(65 + i)] = data['ending' + str(i)]
|
||||
choices += chr(65 + i) + '. ' + data['ending' + str(i)] + '\n'
|
||||
data['question'] = data['sent1']
|
||||
for option in data['options']:
|
||||
choices += option + '. ' + data['options'][option] + '\n'
|
||||
data['choices'] = choices
|
||||
data['label'] = chr(65 + int(data['label'])) + '. ' + data[
|
||||
'ending' + str(data['label'])]
|
||||
|
||||
dataset.append(data)
|
||||
|
||||
@ -30,10 +25,5 @@ class MedQADataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path):
|
||||
train_dataset = Dataset.from_list([])
|
||||
val_dataset = MedQADataset.load_single(path) # "data/MedQA/test.json"
|
||||
dataset = DatasetDict({
|
||||
'train': train_dataset,
|
||||
'validation': val_dataset
|
||||
})
|
||||
dataset = MedQADataset.load_single(path)
|
||||
return dataset
|
||||
|
Loading…
Reference in New Issue
Block a user