mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add FinanceIQ dataset (#596)
This commit is contained in:
parent
8160cb84e3
commit
c0acd06b05
4
configs/datasets/FinanceIQ/FinanceIQ_gen.py
Normal file
4
configs/datasets/FinanceIQ/FinanceIQ_gen.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .FinanceIQ_gen_e0e6b5 import FinanceIQ_datasets # noqa: F401, F403
|
77
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
Normal file
77
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import FinanceIQDataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
financeIQ_subject_mapping_en = {
|
||||||
|
'certified_public_accountant': '注册会计师(CPA)',
|
||||||
|
'banking_qualification': '银行从业资格',
|
||||||
|
'securities_qualification': '证券从业资格',
|
||||||
|
'fund_qualification': '基金从业资格',
|
||||||
|
'insurance_qualification': '保险从业资格CICE',
|
||||||
|
'economic_analyst': '经济师',
|
||||||
|
'taxation_practitioner': '税务师',
|
||||||
|
'futures_qualification': '期货从业资格',
|
||||||
|
'certified_fin_planner': '理财规划师',
|
||||||
|
'actuary_fin_math': '精算师-金融数学',
|
||||||
|
}
|
||||||
|
|
||||||
|
financeIQ_subject_mapping = {
|
||||||
|
'注册会计师(CPA)': '注册会计师(CPA)',
|
||||||
|
'银行从业资格': '银行从业资格',
|
||||||
|
'证券从业资格': '证券从业资格',
|
||||||
|
'基金从业资格': '基金从业资格',
|
||||||
|
'保险从业资格CICE': '保险从业资格CICE',
|
||||||
|
'经济师': '经济师',
|
||||||
|
'税务师': '税务师',
|
||||||
|
'期货从业资格': '期货从业资格',
|
||||||
|
'理财规划师': '理财规划师',
|
||||||
|
'精算师-金融数学': '精算师-金融数学',
|
||||||
|
}
|
||||||
|
|
||||||
|
financeIQ_all_sets = list(financeIQ_subject_mapping.keys())
|
||||||
|
|
||||||
|
financeIQ_datasets = []
|
||||||
|
for _name in financeIQ_all_sets:
|
||||||
|
_ch_name = financeIQ_subject_mapping[_name]
|
||||||
|
financeIQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin="</E>",
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role="HUMAN",
|
||||||
|
prompt=
|
||||||
|
f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
|
||||||
|
),
|
||||||
|
dict(role="BOT", prompt='答案是: {answer}'),
|
||||||
|
]),
|
||||||
|
ice_token="</E>",
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
financeIQ_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess))
|
||||||
|
|
||||||
|
financeIQ_datasets.append(
|
||||||
|
dict(
|
||||||
|
type=FinanceIQDataset,
|
||||||
|
path="./data/FinanceIQ/",
|
||||||
|
name=_name,
|
||||||
|
abbr=f"FinanceIQ-{_name}",
|
||||||
|
reader_cfg=dict(
|
||||||
|
input_columns=["question", "A", "B", "C", "D"],
|
||||||
|
output_column="answer",
|
||||||
|
train_split="dev",
|
||||||
|
test_split='test'),
|
||||||
|
infer_cfg=financeIQ_infer_cfg,
|
||||||
|
eval_cfg=financeIQ_eval_cfg,
|
||||||
|
))
|
||||||
|
|
||||||
|
del _name, _ch_name
|
4
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
Normal file
4
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .FinanceIQ_ppl_42b9bd import FinanceIQ_datasets # noqa: F401, F403
|
76
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
Normal file
76
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import FinanceIQDataset
|
||||||
|
|
||||||
|
financeIQ_subject_mapping_en = {
|
||||||
|
'certified_public_accountant': '注册会计师(CPA)',
|
||||||
|
'banking_qualification': '银行从业资格',
|
||||||
|
'securities_qualification': '证券从业资格',
|
||||||
|
'fund_qualification': '基金从业资格',
|
||||||
|
'insurance_qualification': '保险从业资格CICE',
|
||||||
|
'economic_analyst': '经济师',
|
||||||
|
'taxation_practitioner': '税务师',
|
||||||
|
'futures_qualification': '期货从业资格',
|
||||||
|
'certified_fin_planner': '理财规划师',
|
||||||
|
'actuary_fin_math': '精算师-金融数学',
|
||||||
|
}
|
||||||
|
|
||||||
|
financeIQ_subject_mapping = {
|
||||||
|
'注册会计师(CPA)': '注册会计师(CPA)',
|
||||||
|
'银行从业资格': '银行从业资格',
|
||||||
|
'证券从业资格': '证券从业资格',
|
||||||
|
'基金从业资格': '基金从业资格',
|
||||||
|
'保险从业资格CICE': '保险从业资格CICE',
|
||||||
|
'经济师': '经济师',
|
||||||
|
'税务师': '税务师',
|
||||||
|
'期货从业资格': '期货从业资格',
|
||||||
|
'理财规划师': '理财规划师',
|
||||||
|
'精算师-金融数学': '精算师-金融数学',
|
||||||
|
}
|
||||||
|
|
||||||
|
financeIQ_all_sets = list(financeIQ_subject_mapping.keys())
|
||||||
|
|
||||||
|
financeIQ_datasets = []
|
||||||
|
for _name in financeIQ_all_sets:
|
||||||
|
_ch_name = financeIQ_subject_mapping[_name]
|
||||||
|
financeIQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
answer: dict(
|
||||||
|
begin="</E>",
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role="HUMAN",
|
||||||
|
prompt=f"以下是关于{_ch_name}的单项选择题,请直接给出正确答案的选项。\n题目:{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}"
|
||||||
|
),
|
||||||
|
dict(role="BOT", prompt=f'答案是: {answer}'),
|
||||||
|
])
|
||||||
|
for answer in ["A", "B", "C", "D"]
|
||||||
|
},
|
||||||
|
ice_token="</E>",
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
financeIQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
financeIQ_datasets.append(
|
||||||
|
dict(
|
||||||
|
type=FinanceIQDataset,
|
||||||
|
path="./data/FinanceIQ/",
|
||||||
|
name=_name,
|
||||||
|
abbr=f"FinanceIQ-{_name}",
|
||||||
|
reader_cfg=dict(
|
||||||
|
input_columns=["question", "A", "B", "C", "D"],
|
||||||
|
output_column="answer",
|
||||||
|
train_split="dev",
|
||||||
|
test_split='test'),
|
||||||
|
infer_cfg=financeIQ_infer_cfg,
|
||||||
|
eval_cfg=financeIQ_eval_cfg,
|
||||||
|
))
|
||||||
|
|
||||||
|
del _name, _ch_name
|
39
opencompass/datasets/FinanceIQ.py
Normal file
39
opencompass/datasets/FinanceIQ.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import csv
|
||||||
|
import os.path as osp
|
||||||
|
|
||||||
|
from datasets import Dataset, DatasetDict
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class FinanceIQDataset(BaseDataset):
|
||||||
|
|
||||||
|
# @staticmethod
|
||||||
|
# def load(path: str):
|
||||||
|
# from datasets import load_dataset
|
||||||
|
# return load_dataset('csv', data_files={'test': path})
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path: str, name: str):
|
||||||
|
dataset = DatasetDict()
|
||||||
|
for split in ['dev', 'test']:
|
||||||
|
raw_data = []
|
||||||
|
filename = osp.join(path, split, f'{name}.csv')
|
||||||
|
with open(filename, encoding='utf-8') as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
_ = next(reader) # skip the header
|
||||||
|
for row in reader:
|
||||||
|
assert len(row) == 7
|
||||||
|
raw_data.append({
|
||||||
|
'question': row[1],
|
||||||
|
'A': row[2],
|
||||||
|
'B': row[3],
|
||||||
|
'C': row[4],
|
||||||
|
'D': row[5],
|
||||||
|
'answer': row[6],
|
||||||
|
})
|
||||||
|
dataset[split] = Dataset.from_list(raw_data)
|
||||||
|
return dataset
|
@ -30,6 +30,7 @@ from .drop import * # noqa: F401, F403
|
|||||||
from .ds1000 import * # noqa: F401, F403
|
from .ds1000 import * # noqa: F401, F403
|
||||||
from .ds1000_interpreter import * # noqa: F401, F403
|
from .ds1000_interpreter import * # noqa: F401, F403
|
||||||
from .eprstmt import * # noqa: F401, F403
|
from .eprstmt import * # noqa: F401, F403
|
||||||
|
from .FinanceIQ import * # noqa: F401, F403
|
||||||
from .flores import * # noqa: F401, F403
|
from .flores import * # noqa: F401, F403
|
||||||
from .game24 import * # noqa: F401, F403
|
from .game24 import * # noqa: F401, F403
|
||||||
from .GaokaoBench import * # noqa: F401, F403
|
from .GaokaoBench import * # noqa: F401, F403
|
||||||
|
Loading…
Reference in New Issue
Block a user