From 2abf6ca795ae4e47750537a87701de7cdcf0de5b Mon Sep 17 00:00:00 2001 From: wujiang Date: Sun, 2 Feb 2025 14:46:39 +0800 Subject: [PATCH] update HuMatchingFIB --- examples/eval_OpenHuEval_HuStandardFIB.py | 13 ++++ .../OpenHuEval/HuStandardFIB/HuStandardFIB.py | 68 +++++++++---------- .../HuStandardFIB/HuStandardFIB_setting.py | 38 +++++++---- .../datasets/OpenHuEval/HuStandardFIB.py | 32 +++++---- 4 files changed, 86 insertions(+), 65 deletions(-) create mode 100644 examples/eval_OpenHuEval_HuStandardFIB.py diff --git a/examples/eval_OpenHuEval_HuStandardFIB.py b/examples/eval_OpenHuEval_HuStandardFIB.py new file mode 100644 index 00000000..592739fe --- /dev/null +++ b/examples/eval_OpenHuEval_HuStandardFIB.py @@ -0,0 +1,13 @@ +from mmengine.config import read_base + +with read_base(): + from opencompass.configs.datasets.OpenHuEval.HuStandardFIB.HuStandardFIB import hu_standard_fib_datasets + + from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model + from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model + from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model + # from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model + +datasets = hu_standard_fib_datasets +models = sum([v for k, v in locals().items() if k.endswith('_model')], []) +work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable diff --git a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py index 7c1fdc6e..84920fe5 100644 --- a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py +++ b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py @@ -3,48 +3,42 @@ from mmengine.config import read_base from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets.OpenHuEval.HuStandardFIB import HuStandardFIBDataset, HuStandardFIBEvaluator +from opencompass.datasets.OpenHuEval.HuStandardFIB import HuStandardFIBDataset, HuStandardFIBEvaluator with read_base(): - from .HuStandardFIB_setting import INSTRUCTIONS, DATASET_PATH + from .HuStandardFIB_setting import INSTRUCTION, DATA_PATH, DATA_VERSION -ALL_LANGUAGES = ['hu'] -PROMPT_VERSION = INSTRUCTIONS['version'] +instruction = INSTRUCTION['prompt_template'] +prompt_version = INSTRUCTION['version'] -FIB2_reader_cfg = dict(input_columns=['question', 'subject'], - output_column='reference') +hu_standard_fib_reader_cfg = dict(input_columns=['question', 'subject'], + output_column='reference') -FIB2_datasets = [] -for lan in ALL_LANGUAGES: - instruction = INSTRUCTIONS[lan] - FIB2_infer_cfg = dict( - prompt_template=dict( - type=PromptTemplate, - template=dict( - begin='', - round=[ - dict( - role='HUMAN', - prompt=instruction - ), - ], - ), - ice_token='', +hu_standard_fib_datasets = [] + +hu_standard_fib_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict(role='HUMAN', prompt=instruction), + ], ), - retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer), - ) + ice_token='', + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) - FIB2_eval_cfg = dict(evaluator=dict(type=HuStandardFIBEvaluator)) +hu_standard_fib_eval_cfg = dict(evaluator=dict(type=HuStandardFIBEvaluator)) - FIB2_datasets.append( - dict( - abbr=f'HuStandardFIB-{lan}-1shot-{PROMPT_VERSION}', - type=HuStandardFIBDataset, - path=DATASET_PATH, - lan=lan, - reader_cfg=FIB2_reader_cfg, - infer_cfg=FIB2_infer_cfg, - eval_cfg=FIB2_eval_cfg, - ) - ) +hu_standard_fib_datasets.append( + dict( + abbr=f'hu_standard_fib_{DATA_VERSION}-prompt_{prompt_version}', + type=HuStandardFIBDataset, + filepath=DATA_PATH, + reader_cfg=hu_standard_fib_reader_cfg, + infer_cfg=hu_standard_fib_infer_cfg, + eval_cfg=hu_standard_fib_eval_cfg, + )) diff --git a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py index 708993e1..69e3f386 100644 --- a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py +++ b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py @@ -1,15 +1,27 @@ -INSTRUCTIONS = { - 'hu': """The following question is in hungarian language on {subject}, please read the question, and try to fill in the blank in the sub question list. Please organize the answer in a list. An example: - { - "q_main": "Írd be a megfelelő meghatározás mellé a fogalmat!", - "q_sub": ["A.A szerzetesi közösségek szabályzatának elnevezése latinul: #0#", "B.Az első ún. kolduló rend: #1#", "C.A szerzetesek által kézzel másolt mű: #2#", "D.Papi nőtlenség: #3#", "E.A pápát megválasztó egyházi méltóságok: #4#", "F.A bencés rend megújítása ebben a kolostorban kezdődött a 10. században: #5#"], - "formatted_std_ans": ["#0#regula", "#1#ferencesrend;ferences", "#2#kódex", "#3#cölibátus", "#4#bíborosok;bíboros", "#5#Cluny"] - } - Now try to answer the following question, your response should be in a JSON format. Contain the std_ans like the case given above. - The question is: {question}. - """, - 'version':'V1', - 'description': 'Initial version, using 1shot, incontext, #0# as place holder, output in JSON format', +INSTRUCTION = { + 'prompt_template': + """The following questions are in Hungarian language on {hu_specific_dim}, please read the questions, and try to fill in the blanks in the question list. Please organize the answer in a list. An example: +{ + "instruction": "Írd be a megfelelő meghatározás mellé a fogalmat!", + "questions": ["A.A szerzetesi közösségek szabályzatának elnevezése latinul: #0#", "B.Az első ún. kolduló rend: #1#", "C.A szerzetesek által kézzel másolt mű: #2#", "D.Papi nőtlenség: #3#", "E.A pápát megválasztó egyházi méltóságok: #4#", "F.A bencés rend megújítása ebben a kolostorban kezdődött a 10. században: #5#"], +} +The answers are: +{ + "answers": ["#0#regula", "#1#ferencesrend", "#2#kódex", "#3#cölibátus", "#4#bíborosok", "#5#Cluny"] +} +Now try to answer the following questions, your response should be in a JSON format. Contain the "answers" like the case given above. +The questions are: +{ + "instruction": {instruction}, + "questions": {questions}, +} +""", + 'version': + 'V1', + 'description': + 'Initial version, using 1shot, incontext, #0# as place holder, output in JSON format', } -DATASET_PATH = "/mnt/hwfile/opendatalab/weixingjian/test/test2/" \ No newline at end of file +OpenHuEval_Path = '/mnt/hwfile/opendatalab/wj/proj/polyglot_24July/OpenHuEval' +DATA_VERSION = '250126' +DATA_PATH = f'{OpenHuEval_Path}/data/HuStandardFIB/HuStandardFIB_{DATA_VERSION}/HuStandardFIB.jsonl' diff --git a/opencompass/datasets/OpenHuEval/HuStandardFIB.py b/opencompass/datasets/OpenHuEval/HuStandardFIB.py index 7db79cf3..62e3d998 100644 --- a/opencompass/datasets/OpenHuEval/HuStandardFIB.py +++ b/opencompass/datasets/OpenHuEval/HuStandardFIB.py @@ -4,21 +4,20 @@ import re from datasets import Dataset, DatasetDict from fuzzywuzzy import fuzz + from opencompass.openicl.icl_evaluator import BaseEvaluator + from ..base import BaseDataset class HuStandardFIBDataset(BaseDataset): @staticmethod - def load(**kwargs): - path = kwargs.get('path', None) - # lan = kwargs.get('lan', None) + def load(filepath): + assert os.path.isfile(filepath) + assert filepath.endswith('.jsonl') dataset = DatasetDict() - file_list = [os.path.join(path, file) for file in os.listdir(path) - ] # TODO only work for a single split. - f_path = file_list[0] - f = open(f_path, 'r', encoding='utf-8') + f = open(filepath, 'r', encoding='utf-8') lines = f.readlines() objs = [] @@ -29,11 +28,15 @@ class HuStandardFIBDataset(BaseDataset): out_dict_list = [] for obj in objs: - question = dict(q_main=obj['q_main'], - q_sub=obj['formatted_q_sub']) # TODO - subject = obj['major'] + instruction = obj['question'] # TODO: question -> instruction + questions = obj[ + 'question_sub'] # TODO: update question_sub -> questions + hu_specific_dim = obj['hu_specific_dim'] tmp = obj - new_obj = dict(question=question, subject=subject, reference=tmp) + new_obj = dict(instruction=instruction, + questions=questions, + hu_specific_dim=hu_specific_dim, + reference=tmp) out_dict_list.append(new_obj) dataset = Dataset.from_list(out_dict_list) return dataset @@ -55,9 +58,8 @@ class HuStandardFIBEvaluator(BaseEvaluator): zip(predictions, references, origin_prompt)): std_ans = [ re.sub(r'#\d+#', '', ans).split(';') - for ans in refer['formatted_std_ans'] - ] # Remove "#0#" and "#1#", then split - # refer['formatted_std_ans'] + for ans in refer['answer'] # TODO: answer -> answers + ] # Remove "#0#" and "#1#", then split refer['formatted_std_ans'] model_ans = [] pred = pred.strip() match = re.search(r'\{.*?\}', pred, re.DOTALL) @@ -99,7 +101,7 @@ class HuStandardFIBEvaluator(BaseEvaluator): if to_end_flag: model_ans = [ re.sub(r'#\d+#', '', ans).split(';') - for ans in data.get('formatted_std_ans', []) + for ans in data.get('answers', []) ] # Preprocess model_ans in the same way as std_ans is_question_correct = True for idx, ans_list in enumerate(std_ans):