[Feature] Add ChemBench (#1032)

* add ChemBench * update results * molbench -> ChemBench --------- Co-authored-by: Leymore <zfz-960727@163.com>
2025-05-30 16:03:24 +08:00 · 2024-04-12 08:46:26 +08:00 · 2024-04-12 08:46:26 +08:00 · a00e57296f
commit a00e57296f
parent bd7c11bb89
4 changed files with 134 additions and 0 deletions
--- a/configs/datasets/ChemBench/ChemBench_gen.py
+++ b/configs/datasets/ChemBench/ChemBench_gen.py
@ -0,0 +1,77 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ChemBenchDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+
+chembench_reader_cfg = dict(
+    input_columns=["input", "A", "B", "C", "D"],
+    output_column="target",
+    train_split='dev')
+
+chembench_all_sets = [
+    'Name_Conversion',
+    'Property_Prediction',
+    'Mol2caption',
+    'Caption2mol',
+    'Product_Prediction',
+    'Retrosynthesis',
+    'Yield_Prediction',
+    'Temperature_Prediction',
+    'Solvent_Prediction'
+]
+
+
+chembench_datasets = []
+for _name in chembench_all_sets:
+    # _hint = f'There is a single choice question about {_name.replace("_", " ")}. Answer the question by replying A, B, C or D.'
+    _hint = f'There is a single choice question about chemistry. Answer the question by replying A, B, C or D.'
+
+    chembench_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: "
+                ),
+                dict(role="BOT", prompt="{target}\n")
+            ]),
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin="</E>",
+                round=[
+                    dict(
+                        role="HUMAN",
+                        prompt=
+                        f"{_hint}\nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: "
+                    ),
+                ],
+            ),
+            ice_token="</E>",
+        ),
+        retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    chembench_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type=first_capital_postprocess))
+
+    chembench_datasets.append(
+        dict(
+            abbr=f"ChemBench_{_name}",
+            type=ChemBenchDataset,
+            path="./data/ChemBench/",
+            name=_name,
+            reader_cfg=chembench_reader_cfg,
+            infer_cfg=chembench_infer_cfg,
+            eval_cfg=chembench_eval_cfg,
+        ))
+
+del _name, _hint
--- a/configs/eval_chembench.py
+++ b/configs/eval_chembench.py
@ -0,0 +1,22 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .datasets.ChemBench.ChemBench_gen import chembench_datasets
+    from .models.mistral.hf_mistral_7b_instruct_v0_2 import models
+
+datasets = [*chembench_datasets]
+models = [*models]
+
+'''
+dataset                           version    metric    mode      mistral-7b-instruct-v0.2-hf
+--------------------------------  ---------  --------  ------  -----------------------------
+ChemBench_Name_Conversion         d4e6a1     accuracy  gen                             45.43
+ChemBench_Property_Prediction     d4e6a1     accuracy  gen                             47.11
+ChemBench_Mol2caption             d4e6a1     accuracy  gen                             64.21
+ChemBench_Caption2mol             d4e6a1     accuracy  gen                             35.38
+ChemBench_Product_Prediction      d4e6a1     accuracy  gen                             38.67
+ChemBench_Retrosynthesis          d4e6a1     accuracy  gen                             27
+ChemBench_Yield_Prediction        d4e6a1     accuracy  gen                             27
+ChemBench_Temperature_Prediction  d4e6a1     accuracy  gen                             26.73
+ChemBench_Solvent_Prediction      d4e6a1     accuracy  gen                             32.67
+'''
--- a/opencompass/datasets/init.py
+++ b/opencompass/datasets/init.py
@ -12,6 +12,7 @@ from .bustum import *  # noqa: F401, F403
 from .c3 import *  # noqa: F401, F403
 from .cb import *  # noqa: F401, F403
 from .ceval import *  # noqa: F401, F403
+from .chembench import *  # noqa: F401, F403
 from .chid import *  # noqa: F401, F403
 from .cibench import *  # noqa: F401, F403
 from .circular import *  # noqa: F401, F403
--- a/opencompass/datasets/chembench.py
+++ b/opencompass/datasets/chembench.py
@ -0,0 +1,34 @@
+import json
+import os.path as osp
+
+from datasets import Dataset, DatasetDict
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class ChemBenchDataset(BaseDataset):
+
+    @staticmethod
+    def load(path: str, name: str):
+        dataset = DatasetDict()
+        for split in ['dev', 'test']:
+            raw_data = []
+            filename = osp.join(path, split, f'{name}_benchmark.json')
+            with open(filename, 'r', encoding='utf-8') as json_file:
+                data = json.load(json_file)
+
+            for item in data:
+                raw_data.append({
+                    'input': item['question'],
+                    'A': item['A'],
+                    'B': item['B'],
+                    'C': item['C'],
+                    'D': item['D'],
+                    'target': item['answer'],
+                })
+
+            dataset[split] = Dataset.from_list(raw_data)
+        return dataset