OpenCompass/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_gen_8607a3.py
Linchen Xiao b2da1c08a8
[Dataset] Add SmolInstruct, Update Chembench (#2025)
* [Dataset] Add SmolInstruct, Update Chembench

* Add dataset metadata

* update

* update

* update
2025-04-18 17:21:29 +08:00

80 lines
3.2 KiB
Python

from opencompass.openicl import AccEvaluator
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import SmolInstructDataset
from opencompass.datasets.smolinstruct import smolinstruct_acc_postprocess
pp_acc_reader_cfg = dict(
input_columns=['input'],
output_column='output',
train_split='validation')
pp_acc_hint_dict = {
'BBBP': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether blood-brain barrier permeability (BBBP) is a property of the compound.
The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
'ClinTox': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound is toxic.
The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
'HIV': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound serve as an inhibitor of HIV replication.
The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
'SIDER': """You are an expert chemist. Given the smiles representation of the compound, your task is to predict whether the compound has any side effects.
The input contains the compound. Your reply should contain only Yes or No. Your reply must be valid and chemically reasonable.""",
}
name_dict = {
'BBBP': 'property_prediction-bbbp',
'ClinTox': 'property_prediction-clintox',
'HIV': 'property_prediction-hiv',
'SIDER': 'property_prediction-sider',
}
pp_acc_datasets = []
for _name in pp_acc_hint_dict:
_hint = pp_acc_hint_dict[_name]
pp_acc_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
),
dict(role='BOT', prompt='{output}\n')
]),
),
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt=f'{_hint}\nQuestion: {{input}}\nAnswer: '
),
],
),
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0]),
inferencer=dict(type=GenInferencer),
)
pp_acc_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=smolinstruct_acc_postprocess)
)
pp_acc_datasets.append(
dict(
abbr=f'PP-{_name}',
type=SmolInstructDataset,
path='osunlp/SMolInstruct',
name=name_dict[_name],
reader_cfg=pp_acc_reader_cfg,
infer_cfg=pp_acc_infer_cfg,
eval_cfg=pp_acc_eval_cfg,
))
del _name, _hint