diff --git a/examples/eval_OpenHuEval_HuMatchingFIB.py b/examples/eval_OpenHuEval_HuMatchingFIB.py
index ab48c4da..b5c3347f 100644
--- a/examples/eval_OpenHuEval_HuMatchingFIB.py
+++ b/examples/eval_OpenHuEval_HuMatchingFIB.py
@@ -4,16 +4,17 @@ with read_base():
     from opencompass.configs.datasets.OpenHuEval.HuMatchingFIB.HuMatchingFIB import hu_matching_fib_datasets
 
     # from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
-    from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
+    # from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
     # from opencompass.configs.models.deepseek.deepseek_v3_api import models as deepseek_v3_api_model
 
     # from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
     # from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model
 
     # from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
-    from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
+    # from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
     # from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
     # from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import models as lmdeploy_internlm2_5_7b_chat_model
 
 datasets = hu_matching_fib_datasets
 models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
diff --git a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
index 481cc85c..39b2508d 100644
--- a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
@@ -34,6 +34,6 @@ The question and options are:
     'description': 'Version 2, using 1shot, more incontext, "#0#" as place holder, output in JSON format'
 }
 
-OpenHuEval_Path = '/mnt/hwfile/opendatalab/wj/proj/polyglot_24July/OpenHuEval'
-DATA_VERSION = '250126'
+OpenHuEval_Path = '/mnt/hwfile/opendatalab/weixingjian/OpenHuEval'
+DATA_VERSION = '250205'
 DATA_PATH = f'{OpenHuEval_Path}/data/HuMatchingFIB/HuMatchingFIB_{DATA_VERSION}/HuMatchingFIB.jsonl'
diff --git a/opencompass/datasets/OpenHuEval/HuMatchingFIB.py b/opencompass/datasets/OpenHuEval/HuMatchingFIB.py
index c8bf189d..f6ca6ad5 100644
--- a/opencompass/datasets/OpenHuEval/HuMatchingFIB.py
+++ b/opencompass/datasets/OpenHuEval/HuMatchingFIB.py
@@ -61,10 +61,13 @@ class HuMatchingFIBEvaluator(BaseEvaluator):
                 blank_total += len(std_ans)
                 question_total += 1
                 details[idx] = {
-                    'detail': refer,
+                    'reference': refer,
+                    'std_ans': std_ans,
                     'model_ans': model_ans,
                     'prompt': prompt,
                     'raw_pred': pred,
+                    'blank_wise_correct': [False] * len(std_ans),
+                    'question_wise_correct': False,
                 }
                 continue
             json_str = json_str.strip()
@@ -89,27 +92,34 @@ class HuMatchingFIBEvaluator(BaseEvaluator):
                 question_total += 1
 
             model_ans = []
+            blank_wise_correct = []
+            is_question_correct = True
             if to_end_flag:
                 model_ans = data.get('answer', [])
-                is_question_correct = True
                 for index, ans in enumerate(std_ans):
                     if index >= len(model_ans):
                         is_question_correct = False
-                        break
+                        blank_wise_correct.append(False)
+                        continue
                     if ans == model_ans[index]:
                         blank_correct += 1
+                        blank_wise_correct.append(True)
                     else:
                         is_question_correct = False
+                        blank_wise_correct.append(False)
 
                 blank_total += len(std_ans)
                 question_total += 1
                 question_correct += 1 if is_question_correct else 0
 
             details[idx] = {
-                'detail': refer,
+                'reference': refer,
+                'std_ans': std_ans,
                 'model_ans': model_ans,
                 'prompt': prompt,
                 'raw_pred': pred,
+                'blank_wise_correct': blank_wise_correct,
+                'question_wise_correct': is_question_correct,
             }
         results = {
             'blank_level_correctness':