OpenCompass/configs/summarizers/lveval.py
yuantao2108 bbec7d8733
[Feature] add lveval benchmark (#914)
* add lveval benchmark

* add LVEval readme file

* update LVEval readme file

* Update configs/eval_bluelm_32k_lveval.py

* Update configs/eval_llama2_7b_lveval.py

---------

Co-authored-by: yuantao <yuantao@infini-ai.com>
Co-authored-by: Mo Li <82895469+DseidLi@users.noreply.github.com>
2024-03-04 11:22:03 +08:00

115 lines
4.5 KiB
Python

from mmengine.config import read_base
with read_base():
from .groups.lveval import lveval_summary_groups
summarizer = dict(
dataset_abbrs=[
"----------------------------------------",
"--------- LVEval All ---------", # category
"----------------------------------------",
"LVEval_qa",
"----------------------------------------",
"--------- LVEval Tasks All ---------", # category
"----------------------------------------",
"LVEval_single_hop_qa",
"LVEval_single_hop_cqa",
"LVEval_multi_hop_qa",
"LVEval_multi_hop_cqa",
"LVEval_factrecall_cqa",
"----------------------------------------",
"--------- LVEval Datasets All ---------", # category
"----------------------------------------",
"LVEval_loogle_SD_mixup",
"LVEval_cmrc_mixup",
"LVEval_multifieldqa_en_mixup",
"LVEval_multifieldqa_zh_mixup",
"LVEval_dureader_mixup",
"LVEval_loogle_CR_mixup",
"LVEval_loogle_MIR_mixup",
"LVEval_hotpotwikiqa_mixup",
"LVEval_lic_mixup",
"LVEval_factrecall_en",
"LVEval_factrecall_zh",
"----------------------------------------",
"--------- LVEval Single_Hop QA ---------", # category
"----------------------------------------",
"LVEval_loogle_SD_mixup_16k",
"LVEval_loogle_SD_mixup_32k",
"LVEval_loogle_SD_mixup_64k",
"LVEval_loogle_SD_mixup_128k",
"LVEval_loogle_SD_mixup_256k",
"----------------------------------------",
"LVEval_cmrc_mixup_16k",
"LVEval_cmrc_mixup_32k",
"LVEval_cmrc_mixup_64k",
"LVEval_cmrc_mixup_128k",
"LVEval_cmrc_mixup_256k",
"----------------------------------------",
"--------- LVEval Single_Hop CQA ---------", # category
"----------------------------------------",
"LVEval_multifieldqa_en_mixup_16k",
"LVEval_multifieldqa_en_mixup_32k",
"LVEval_multifieldqa_en_mixup_64k",
"LVEval_multifieldqa_en_mixup_128k",
"LVEval_multifieldqa_en_mixup_256k",
"----------------------------------------",
"LVEval_multifieldqa_zh_mixup_16k",
"LVEval_multifieldqa_zh_mixup_32k",
"LVEval_multifieldqa_zh_mixup_64k",
"LVEval_multifieldqa_zh_mixup_128k",
"LVEval_multifieldqa_zh_mixup_256k",
"----------------------------------------",
"--------- LVEval Multi_Hop QA ---------", # category
"----------------------------------------",
"LVEval_dureader_mixup_16k",
"LVEval_dureader_mixup_32k",
"LVEval_dureader_mixup_64k",
"LVEval_dureader_mixup_128k",
"LVEval_dureader_mixup_256k",
"----------------------------------------",
"LVEval_loogle_CR_mixup_16k",
"LVEval_loogle_CR_mixup_32k",
"LVEval_loogle_CR_mixup_64k",
"LVEval_loogle_CR_mixup_128k",
"LVEval_loogle_CR_mixup_256k",
"----------------------------------------",
"LVEval_loogle_MIR_mixup_16k",
"LVEval_loogle_MIR_mixup_32k",
"LVEval_loogle_MIR_mixup_64k",
"LVEval_loogle_MIR_mixup_128k",
"LVEval_loogle_MIR_mixup_256k",
"----------------------------------------",
"--------- LVEval Multi_Hop CQA ---------", # category
"----------------------------------------",
"LVEval_hotpotwikiqa_mixup_16k",
"LVEval_hotpotwikiqa_mixup_32k",
"LVEval_hotpotwikiqa_mixup_64k",
"LVEval_hotpotwikiqa_mixup_128k",
"LVEval_hotpotwikiqa_mixup_256k",
"----------------------------------------",
"LVEval_lic_mixup_16k",
"LVEval_lic_mixup_32k",
"LVEval_lic_mixup_64k",
"LVEval_lic_mixup_128k",
"LVEval_lic_mixup_256k",
"----------------------------------------",
"--------- LVEval Factrecall CQA ---------", # category
"----------------------------------------",
"LVEval_factrecall_en_16k",
"LVEval_factrecall_en_32k",
"LVEval_factrecall_en_64k",
"LVEval_factrecall_en_128k",
"LVEval_factrecall_en_256k",
"----------------------------------------",
"LVEval_factrecall_zh_16k",
"LVEval_factrecall_zh_32k",
"LVEval_factrecall_zh_64k",
"LVEval_factrecall_zh_128k",
"LVEval_factrecall_zh_256k",
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
),
)