mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add lveval benchmark * add LVEval readme file * update LVEval readme file * Update configs/eval_bluelm_32k_lveval.py * Update configs/eval_llama2_7b_lveval.py --------- Co-authored-by: yuantao <yuantao@infini-ai.com> Co-authored-by: Mo Li <82895469+DseidLi@users.noreply.github.com>
115 lines
4.5 KiB
Python
115 lines
4.5 KiB
Python
from mmengine.config import read_base
|
|
|
|
with read_base():
|
|
from .groups.lveval import lveval_summary_groups
|
|
|
|
summarizer = dict(
|
|
dataset_abbrs=[
|
|
"----------------------------------------",
|
|
"--------- LVEval All ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_qa",
|
|
"----------------------------------------",
|
|
"--------- LVEval Tasks All ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_single_hop_qa",
|
|
"LVEval_single_hop_cqa",
|
|
"LVEval_multi_hop_qa",
|
|
"LVEval_multi_hop_cqa",
|
|
"LVEval_factrecall_cqa",
|
|
"----------------------------------------",
|
|
"--------- LVEval Datasets All ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_loogle_SD_mixup",
|
|
"LVEval_cmrc_mixup",
|
|
"LVEval_multifieldqa_en_mixup",
|
|
"LVEval_multifieldqa_zh_mixup",
|
|
"LVEval_dureader_mixup",
|
|
"LVEval_loogle_CR_mixup",
|
|
"LVEval_loogle_MIR_mixup",
|
|
"LVEval_hotpotwikiqa_mixup",
|
|
"LVEval_lic_mixup",
|
|
"LVEval_factrecall_en",
|
|
"LVEval_factrecall_zh",
|
|
"----------------------------------------",
|
|
"--------- LVEval Single_Hop QA ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_loogle_SD_mixup_16k",
|
|
"LVEval_loogle_SD_mixup_32k",
|
|
"LVEval_loogle_SD_mixup_64k",
|
|
"LVEval_loogle_SD_mixup_128k",
|
|
"LVEval_loogle_SD_mixup_256k",
|
|
"----------------------------------------",
|
|
"LVEval_cmrc_mixup_16k",
|
|
"LVEval_cmrc_mixup_32k",
|
|
"LVEval_cmrc_mixup_64k",
|
|
"LVEval_cmrc_mixup_128k",
|
|
"LVEval_cmrc_mixup_256k",
|
|
"----------------------------------------",
|
|
"--------- LVEval Single_Hop CQA ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_multifieldqa_en_mixup_16k",
|
|
"LVEval_multifieldqa_en_mixup_32k",
|
|
"LVEval_multifieldqa_en_mixup_64k",
|
|
"LVEval_multifieldqa_en_mixup_128k",
|
|
"LVEval_multifieldqa_en_mixup_256k",
|
|
"----------------------------------------",
|
|
"LVEval_multifieldqa_zh_mixup_16k",
|
|
"LVEval_multifieldqa_zh_mixup_32k",
|
|
"LVEval_multifieldqa_zh_mixup_64k",
|
|
"LVEval_multifieldqa_zh_mixup_128k",
|
|
"LVEval_multifieldqa_zh_mixup_256k",
|
|
"----------------------------------------",
|
|
"--------- LVEval Multi_Hop QA ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_dureader_mixup_16k",
|
|
"LVEval_dureader_mixup_32k",
|
|
"LVEval_dureader_mixup_64k",
|
|
"LVEval_dureader_mixup_128k",
|
|
"LVEval_dureader_mixup_256k",
|
|
"----------------------------------------",
|
|
"LVEval_loogle_CR_mixup_16k",
|
|
"LVEval_loogle_CR_mixup_32k",
|
|
"LVEval_loogle_CR_mixup_64k",
|
|
"LVEval_loogle_CR_mixup_128k",
|
|
"LVEval_loogle_CR_mixup_256k",
|
|
"----------------------------------------",
|
|
"LVEval_loogle_MIR_mixup_16k",
|
|
"LVEval_loogle_MIR_mixup_32k",
|
|
"LVEval_loogle_MIR_mixup_64k",
|
|
"LVEval_loogle_MIR_mixup_128k",
|
|
"LVEval_loogle_MIR_mixup_256k",
|
|
"----------------------------------------",
|
|
"--------- LVEval Multi_Hop CQA ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_hotpotwikiqa_mixup_16k",
|
|
"LVEval_hotpotwikiqa_mixup_32k",
|
|
"LVEval_hotpotwikiqa_mixup_64k",
|
|
"LVEval_hotpotwikiqa_mixup_128k",
|
|
"LVEval_hotpotwikiqa_mixup_256k",
|
|
"----------------------------------------",
|
|
"LVEval_lic_mixup_16k",
|
|
"LVEval_lic_mixup_32k",
|
|
"LVEval_lic_mixup_64k",
|
|
"LVEval_lic_mixup_128k",
|
|
"LVEval_lic_mixup_256k",
|
|
"----------------------------------------",
|
|
"--------- LVEval Factrecall CQA ---------", # category
|
|
"----------------------------------------",
|
|
"LVEval_factrecall_en_16k",
|
|
"LVEval_factrecall_en_32k",
|
|
"LVEval_factrecall_en_64k",
|
|
"LVEval_factrecall_en_128k",
|
|
"LVEval_factrecall_en_256k",
|
|
"----------------------------------------",
|
|
"LVEval_factrecall_zh_16k",
|
|
"LVEval_factrecall_zh_32k",
|
|
"LVEval_factrecall_zh_64k",
|
|
"LVEval_factrecall_zh_128k",
|
|
"LVEval_factrecall_zh_256k",
|
|
],
|
|
summary_groups=sum(
|
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
|
|
),
|
|
)
|