mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add FinanceIQ dataset (#596)
This commit is contained in:
parent
8160cb84e3
commit
c0acd06b05
4
configs/datasets/FinanceIQ/FinanceIQ_gen.py
Normal file
4
configs/datasets/FinanceIQ/FinanceIQ_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .FinanceIQ_gen_e0e6b5 import FinanceIQ_datasets # noqa: F401, F403
|
77
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
Normal file
77
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
Normal file
@ -0,0 +1,77 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import FinanceIQDataset
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
|
||||
financeIQ_subject_mapping_en = {
|
||||
'certified_public_accountant': '注册会计师(CPA)',
|
||||
'banking_qualification': '银行从业资格',
|
||||
'securities_qualification': '证券从业资格',
|
||||
'fund_qualification': '基金从业资格',
|
||||
'insurance_qualification': '保险从业资格CICE',
|
||||
'economic_analyst': '经济师',
|
||||
'taxation_practitioner': '税务师',
|
||||
'futures_qualification': '期货从业资格',
|
||||
'certified_fin_planner': '理财规划师',
|
||||
'actuary_fin_math': '精算师-金融数学',
|
||||
}
|
||||
|
||||
financeIQ_subject_mapping = {
|
||||
'注册会计师(CPA)': '注册会计师(CPA)',
|
||||
'银行从业资格': '银行从业资格',
|
||||
'证券从业资格': '证券从业资格',
|
||||
'基金从业资格': '基金从业资格',
|
||||
'保险从业资格CICE': '保险从业资格CICE',
|
||||
'经济师': '经济师',
|
||||
'税务师': '税务师',
|
||||
'期货从业资格': '期货从业资格',
|
||||
'理财规划师': '理财规划师',
|
||||
'精算师-金融数学': '精算师-金融数学',
|
||||
}
|
||||
|
||||
financeIQ_all_sets = list(financeIQ_subject_mapping.keys())
|
||||
|
||||
financeIQ_datasets = []
|
||||
for _name in financeIQ_all_sets:
|
||||
_ch_name = financeIQ_subject_mapping[_name]
|
||||
financeIQ_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=
|
||||
f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
|
||||
),
|
||||
dict(role="BOT", prompt='答案是: {answer}'),
|
||||
]),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
financeIQ_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||
|
||||
financeIQ_datasets.append(
|
||||
dict(
|
||||
type=FinanceIQDataset,
|
||||
path="./data/FinanceIQ/",
|
||||
name=_name,
|
||||
abbr=f"FinanceIQ-{_name}",
|
||||
reader_cfg=dict(
|
||||
input_columns=["question", "A", "B", "C", "D"],
|
||||
output_column="answer",
|
||||
train_split="dev",
|
||||
test_split='test'),
|
||||
infer_cfg=financeIQ_infer_cfg,
|
||||
eval_cfg=financeIQ_eval_cfg,
|
||||
))
|
||||
|
||||
del _name, _ch_name
|
4
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
Normal file
4
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .FinanceIQ_ppl_42b9bd import FinanceIQ_datasets # noqa: F401, F403
|
76
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
Normal file
76
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
Normal file
@ -0,0 +1,76 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets import FinanceIQDataset
|
||||
|
||||
financeIQ_subject_mapping_en = {
|
||||
'certified_public_accountant': '注册会计师(CPA)',
|
||||
'banking_qualification': '银行从业资格',
|
||||
'securities_qualification': '证券从业资格',
|
||||
'fund_qualification': '基金从业资格',
|
||||
'insurance_qualification': '保险从业资格CICE',
|
||||
'economic_analyst': '经济师',
|
||||
'taxation_practitioner': '税务师',
|
||||
'futures_qualification': '期货从业资格',
|
||||
'certified_fin_planner': '理财规划师',
|
||||
'actuary_fin_math': '精算师-金融数学',
|
||||
}
|
||||
|
||||
financeIQ_subject_mapping = {
|
||||
'注册会计师(CPA)': '注册会计师(CPA)',
|
||||
'银行从业资格': '银行从业资格',
|
||||
'证券从业资格': '证券从业资格',
|
||||
'基金从业资格': '基金从业资格',
|
||||
'保险从业资格CICE': '保险从业资格CICE',
|
||||
'经济师': '经济师',
|
||||
'税务师': '税务师',
|
||||
'期货从业资格': '期货从业资格',
|
||||
'理财规划师': '理财规划师',
|
||||
'精算师-金融数学': '精算师-金融数学',
|
||||
}
|
||||
|
||||
financeIQ_all_sets = list(financeIQ_subject_mapping.keys())
|
||||
|
||||
financeIQ_datasets = []
|
||||
for _name in financeIQ_all_sets:
|
||||
_ch_name = financeIQ_subject_mapping[_name]
|
||||
financeIQ_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
answer: dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
|
||||
),
|
||||
dict(role="BOT", prompt=f'答案是: {answer}'),
|
||||
])
|
||||
for answer in ["A", "B", "C", "D"]
|
||||
},
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
|
||||
inferencer=dict(type=PPLInferencer),
|
||||
)
|
||||
|
||||
financeIQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||
|
||||
financeIQ_datasets.append(
|
||||
dict(
|
||||
type=FinanceIQDataset,
|
||||
path="./data/FinanceIQ/",
|
||||
name=_name,
|
||||
abbr=f"FinanceIQ-{_name}",
|
||||
reader_cfg=dict(
|
||||
input_columns=["question", "A", "B", "C", "D"],
|
||||
output_column="answer",
|
||||
train_split="dev",
|
||||
test_split='test'),
|
||||
infer_cfg=financeIQ_infer_cfg,
|
||||
eval_cfg=financeIQ_eval_cfg,
|
||||
))
|
||||
|
||||
del _name, _ch_name
|
39
opencompass/datasets/FinanceIQ.py
Normal file
39
opencompass/datasets/FinanceIQ.py
Normal file
@ -0,0 +1,39 @@
|
||||
import csv
|
||||
import os.path as osp
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class FinanceIQDataset(BaseDataset):
|
||||
|
||||
# @staticmethod
|
||||
# def load(path: str):
|
||||
# from datasets import load_dataset
|
||||
# return load_dataset('csv', data_files={'test': path})
|
||||
|
||||
@staticmethod
|
||||
def load(path: str, name: str):
|
||||
dataset = DatasetDict()
|
||||
for split in ['dev', 'test']:
|
||||
raw_data = []
|
||||
filename = osp.join(path, split, f'{name}.csv')
|
||||
with open(filename, encoding='utf-8') as f:
|
||||
reader = csv.reader(f)
|
||||
_ = next(reader) # skip the header
|
||||
for row in reader:
|
||||
assert len(row) == 7
|
||||
raw_data.append({
|
||||
'question': row[1],
|
||||
'A': row[2],
|
||||
'B': row[3],
|
||||
'C': row[4],
|
||||
'D': row[5],
|
||||
'answer': row[6],
|
||||
})
|
||||
dataset[split] = Dataset.from_list(raw_data)
|
||||
return dataset
|
@ -30,6 +30,7 @@ from .drop import * # noqa: F401, F403
|
||||
from .ds1000 import * # noqa: F401, F403
|
||||
from .ds1000_interpreter import * # noqa: F401, F403
|
||||
from .eprstmt import * # noqa: F401, F403
|
||||
from .FinanceIQ import * # noqa: F401, F403
|
||||
from .flores import * # noqa: F401, F403
|
||||
from .game24 import * # noqa: F401, F403
|
||||
from .GaokaoBench import * # noqa: F401, F403
|
||||
|
Loading…
Reference in New Issue
Block a user