From 6527fdf70ae3cc5eb16740bbc655cfe67d0c0181 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 21 Jan 2025 20:23:27 +0800
Subject: [PATCH] add HuMatchingFIB under new paradigm

---
 examples/eval_fib1.py                         |  11 ++
 .../OpenHuEval/HuMatchingFIB/HuMatchingFIB.py |  50 ++++++++
 .../HuMatchingFIB/HuMatchingFIB_setting.py    |  34 +++++
 .../OpenHuEval/HuMatchingFIB/__init__.py      |   1 +
 .../datasets/OpenHuEval/HuMatchingFIB.py      | 120 ++++++++++++++++++
 opencompass/datasets/OpenHuEval/__init__.py   |   1 +
 opencompass/datasets/__init__.py              |   1 +
 7 files changed, 218 insertions(+)
 create mode 100644 examples/eval_fib1.py
 create mode 100644 opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py
 create mode 100644 opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
 create mode 100644 opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/__init__.py
 create mode 100644 opencompass/datasets/OpenHuEval/HuMatchingFIB.py
 create mode 100644 opencompass/datasets/OpenHuEval/__init__.py
diff --git a/examples/eval_fib1.py b/examples/eval_fib1.py
new file mode 100644
index 00000000..0e1f68c0
--- /dev/null
+++ b/examples/eval_fib1.py
@@ -0,0 +1,11 @@
+from mmengine.config import read_base
+
+with read_base():
+    from opencompass.configs.datasets.OpenHuEval.HuMatchingFIB.HuMatchingFIB import FIB1_datasets
+
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import models as lmdeploy_internlm2_5_7b_chat_model
+
+
+datasets = FIB1_datasets
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable
diff --git a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py
new file mode 100644
index 00000000..0149d615
--- /dev/null
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py
@@ -0,0 +1,50 @@
+from mmengine.config import read_base
+
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets.OpenHuEval.HuMatchingFIB import HuMatchingFIBDataset, HuMatchingFIBEvaluator
+
+with read_base():
+    from .HuMatchingFIB_setting import INSTRUCTIONS, DATASET_PATH
+
+ALL_LANGUAGES = ['hu']
+PROMPT_VERSION = INSTRUCTIONS['version']
+
+FIB1_reader_cfg = dict(input_columns=['question', 'subject'],
+                         output_column='reference')
+
+FIB1_datasets = []
+for lan in ALL_LANGUAGES:
+    instruction = INSTRUCTIONS[lan]
+    FIB1_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(
+                        role='HUMAN',
+                        prompt=instruction
+                    ),
+                ],
+            ),
+            ice_token='</E>',
+        ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )
+
+    FIB1_eval_cfg = dict(evaluator=dict(type=HuMatchingFIBEvaluator))
+
+    FIB1_datasets.append(
+        dict(
+            abbr=f'nkp_FIB1_humanities-{lan}-1shot-{PROMPT_VERSION}',
+            type=HuMatchingFIBDataset,
+            path=DATASET_PATH,
+            lan=lan,
+            reader_cfg=FIB1_reader_cfg,
+            infer_cfg=FIB1_infer_cfg,
+            eval_cfg=FIB1_eval_cfg,
+        )
+    )
diff --git a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
new file mode 100644
index 00000000..cbed1807
--- /dev/null
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
@@ -0,0 +1,34 @@
+# INSTRUCTIONS = {
+#     'hu': """
+#     The following question is in hungarian language on {subject}, please read the question, and try to fill in the blank by chosing appropriate option from the option list. Please organize the answer in a list. An example:
+#     {
+#         "q_main": "Egészítsd ki a Janus Pannonius életére vonatkozó rövid szöveget! Segítségként használd az internetet! Vigyázz, nem minden szót kell felhasználnod!\nJanus Pannonius nem csupán költőként volt jelentős személyisége kora Magyarországának. #0# unokaöccseként a politikából is hamar kivette a részét. #1# tanulmányai után pécsi #2# lett, majd a királyné mellett #3#. Főkincstartóként és a #4# báni cím elnyerésével komoly politikai karriert futott be Mátyás király udvarában. A királlyal megromló kapcsolata miatt részt vett a #5# elleni összeesküvésben, ezért menekülnie kellett. Ez, és az akkor már súlyosbodó betegsége okozta halálát #6#.",
+#         "options": ["A.érsek", "B.szlavón", "C.Vitéz János", "D.püspök", "E.főpohárnok", "F.Ulászló", "G.1474-ben", "H.főkancellár", "I.Itáliai", "J.Kinizsi Pál", "K.Kálmán", "L.1472-ben", "M.Prágai", "N.Mátyás"],
+#         "std_ans": ["#0#C", "#1#I", "#2#D", "#3#H", "#4#B", "#5#N", "#6#L"],
+#     }
+#     Now try to answer the following question, your response should be in a JSON format. Contain the std_ans like the case given above.
+#     The question is: {question}.
+#     """,
+#     'version':'V1',
+#     'description': 'Initial version, using 1shot, incontext, #0# as place holder, output in JSON format',
+# }
+
+INSTRUCTIONS = {
+    'hu': """
+    You are a native hungarian teacher. The following question is in hungarian language on {subject}. Please read the question, and You need to choose the appropriate option from the provided "option" list to fill in each blanks in the text based on the context. Read the entire text, then fill in the blanks. Some options can be selected repeatedly. Please organize the answer in a list. An example:
+    {
+        "q_main": "Egészítsd ki a Janus Pannonius életére vonatkozó rövid szöveget! Segítségként használd az internetet! Vigyázz, nem minden szót kell felhasználnod!\nJanus Pannonius nem csupán költőként volt jelentős személyisége kora Magyarországának. #0# unokaöccseként a politikából is hamar kivette a részét. #1# tanulmányai után pécsi #2# lett, majd a királyné mellett #3#. Főkincstartóként és a #4# báni cím elnyerésével komoly politikai karriert futott be Mátyás király udvarában. A királlyal megromló kapcsolata miatt részt vett a #5# elleni összeesküvésben, ezért menekülnie kellett. Ez, és az akkor már súlyosbodó betegsége okozta halálát #6#.",
+        "options": ["A.érsek", "B.szlavón", "C.Vitéz János", "D.püspök", "E.főpohárnok", "F.Ulászló", "G.1474-ben", "H.főkancellár", "I.Itáliai", "J.Kinizsi Pál", "K.Kálmán", "L.1472-ben", "M.Prágai", "N.Mátyás"],
+    },
+    The answer is:
+    {
+        "std_ans": ["#0#C", "#1#I", "#2#D", "#3#H", "#4#B", "#5#N", "#6#L"]
+    }
+    Now try to answer the following question, your response should be in a JSON format. Contain the std_ans like the case given above.
+    The question is: {question}.
+    """,
+    'version':'V2',
+    'description': 'Version 2, using 1shot, more incontext, "#0#" as place holder, output in JSON format'
+}
+
+DATASET_PATH = "/mnt/hwfile/opendatalab/weixingjian/test/"
diff --git a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/__init__.py b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/__init__.py
new file mode 100644
index 00000000..ad5f002e
--- /dev/null
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/__init__.py
@@ -0,0 +1 @@
+from .HuMatchingFIB import *  # noqa: F401, F403
\ No newline at end of file
diff --git a/opencompass/datasets/OpenHuEval/HuMatchingFIB.py b/opencompass/datasets/OpenHuEval/HuMatchingFIB.py
new file mode 100644
index 00000000..2d2044f1
--- /dev/null
+++ b/opencompass/datasets/OpenHuEval/HuMatchingFIB.py
@@ -0,0 +1,120 @@
+import json
+import os
+import re
+
+from datasets import Dataset, DatasetDict
+from opencompass.openicl.icl_evaluator import BaseEvaluator
+from ..base import BaseDataset
+
+
+class HuMatchingFIBDataset(BaseDataset):
+
+    @staticmethod
+    def load(**kwargs):
+        path = kwargs.get('path', None)
+        # lan = kwargs.get('lan', None)
+        dataset = DatasetDict()
+        file_list = [os.path.join(path, file) for file in os.listdir(path)
+                     ]  # TODO only work for a single split.
+        f_path = file_list[0]
+        f = open(f_path, 'r', encoding='utf-8')
+        lines = f.readlines()
+        objs = []
+        for line in lines:
+            obj = json.loads(line)
+            objs.append(obj)
+        out_dict_list = []
+        for obj in objs:
+            question = dict(q_main=obj['q_main'], options=obj['options'])
+            subject = obj['major']
+            tmp = obj
+            new_obj = dict(question=question, subject=subject, reference=tmp)
+            out_dict_list.append(new_obj)
+        dataset = Dataset.from_list(out_dict_list)
+        return dataset
+
+
+class HuMatchingFIBEvaluator(BaseEvaluator):
+    """
+    ref: opencompass.openicl.icl_evaluator.AccwithDetailsEvaluator
+    """
+
+    def score(self, predictions, references, origin_prompt) -> dict:
+        if len(predictions) != len(references):
+            return {'error': 'preds and refers have different length.'}
+        details = {}
+        blank_correct, blank_total = 0, 0
+        question_correct, question_total = 0, 0
+
+        for idx, (pred, refer, prompt) in enumerate(
+                zip(predictions, references, origin_prompt)):
+            std_ans = refer['std_ans']
+            model_ans = []
+            pred = pred.strip()
+            match = re.search(r'\{.*?\}', pred, re.DOTALL)
+            if match:
+                json_str = match.group(0)
+            else:
+                blank_total += len(std_ans)
+                question_total += 1
+                details[idx] = {
+                    'detail': refer,
+                    'model_ans': model_ans,
+                    'prompt': prompt,
+                    'raw_pred': pred,
+                }
+                continue
+            json_str = json_str.strip()
+            json_str = json_str.replace('\\xa0', '')
+            formatted_json_str = json_str
+
+            to_end_flag = False
+            if isinstance(formatted_json_str, str):
+                try:
+                    data = json.loads(formatted_json_str)
+                    to_end_flag = True
+                except json.JSONDecodeError:
+                    print(f'Invalid JSON format. {idx}')
+                    blank_total += len(std_ans)
+                    question_total += 1
+
+            elif isinstance(formatted_json_str, dict):
+                data = formatted_json_str
+                to_end_flag = True
+            else:
+                blank_total += len(std_ans)
+                question_total += 1
+
+            model_ans = []
+            if to_end_flag:
+                model_ans = data.get('std_ans', [])
+                is_question_correct = True
+                for index, ans in enumerate(std_ans):
+                    if index >= len(model_ans):
+                        is_question_correct = False
+                        break
+                    if ans == model_ans[index]:
+                        blank_correct += 1
+                    else:
+                        is_question_correct = False
+
+                blank_total += len(std_ans)
+                question_total += 1
+                question_correct += 1 if is_question_correct else 0
+
+            details[idx] = {
+                'detail': refer,
+                'model_ans': model_ans,
+                'prompt': prompt,
+                'raw_pred': pred,
+            }
+        results = {
+            'blank_level_correctness':
+            round(blank_correct / blank_total * 100, 2),
+            'question_level_correctness':
+            round(question_correct / question_total * 100, 2),
+            'details':
+            details
+        }
+
+        return results
diff --git a/opencompass/datasets/OpenHuEval/__init__.py b/opencompass/datasets/OpenHuEval/__init__.py
new file mode 100644
index 00000000..d70ebfca
--- /dev/null
+++ b/opencompass/datasets/OpenHuEval/__init__.py
@@ -0,0 +1 @@
+from .HuMatchingFIB import *  # noqa: F401, F403
diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py
index e061286f..d81ee4d8 100644
--- a/opencompass/datasets/__init__.py
+++ b/opencompass/datasets/__init__.py
@@ -145,3 +145,4 @@ from .xcopa import *  # noqa: F401, F403
 from .xiezhi import XiezhiDataset, XiezhiRetriever  # noqa: F401, F403
 from .xlsum import *  # noqa: F401, F403
 from .xsum import *  # noqa: F401, F403
+from .OpenHuEval import * # noqa: F401, F403
\ No newline at end of file