diff --git a/README.md b/README.md index 887fcb4c..736968eb 100644 --- a/README.md +++ b/README.md @@ -279,263 +279,13 @@ OpenCompass is a one-stop platform for large model evaluation, aiming to provide ## 📖 Dataset Support - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Language - - Knowledge - - Reasoning - - Examination -
-
-Word Definition +We have supported a statistical list of all datasets that can be used on this platform in the documentation on the OpenCompass website. -- WiC -- SummEdits +You can quickly find the dataset you need from the list through sorting, filtering, and searching functions. -
+Please refer to the dataset statistics chapter of [official document](https://opencompass.org.cn/doc) for details. -
-Idiom Learning - -- CHID - -
- -
-Semantic Similarity - -- AFQMC -- BUSTM - -
- -
-Coreference Resolution - -- CLUEWSC -- WSC -- WinoGrande - -
- -
-Translation - -- Flores -- IWSLT2017 - -
- -
-Multi-language Question Answering - -- TyDi-QA -- XCOPA - -
- -
-Multi-language Summary - -- XLSum - -
-
-
-Knowledge Question Answering - -- BoolQ -- CommonSenseQA -- NaturalQuestions -- TriviaQA - -
-
-
-Textual Entailment - -- CMNLI -- OCNLI -- OCNLI_FC -- AX-b -- AX-g -- CB -- RTE -- ANLI - -
- -
-Commonsense Reasoning - -- StoryCloze -- COPA -- ReCoRD -- HellaSwag -- PIQA -- SIQA - -
- -
-Mathematical Reasoning - -- MATH -- GSM8K - -
- -
-Theorem Application - -- TheoremQA -- StrategyQA -- SciBench - -
- -
-Comprehensive Reasoning - -- BBH - -
-
-
-Junior High, High School, University, Professional Examinations - -- C-Eval -- AGIEval -- MMLU -- GAOKAO-Bench -- CMMLU -- ARC -- Xiezhi - -
- -
-Medical Examinations - -- CMB - -
-
- Understanding - - Long Context - - Safety - - Code -
-
-Reading Comprehension - -- C3 -- CMRC -- DRCD -- MultiRC -- RACE -- DROP -- OpenBookQA -- SQuAD2.0 - -
- -
-Content Summary - -- CSL -- LCSTS -- XSum -- SummScreen - -
- -
-Content Analysis - -- EPRSTMT -- LAMBADA -- TNEWS - -
-
-
-Long Context Understanding - -- LEval -- LongBench -- GovReports -- NarrativeQA -- Qasper - -
-
-
-Safety - -- CivilComments -- CrowsPairs -- CValues -- JigsawMultilingual -- TruthfulQA - -
-
-Robustness - -- AdvGLUE - -
-
-
-Code - -- HumanEval -- HumanEvalX -- MBPP -- APPs -- DS1000 - -
-
+

🔝Back to top

## 📖 Model Support diff --git a/README_zh-CN.md b/README_zh-CN.md index 5c889956..8d8ecd02 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -274,263 +274,11 @@ OpenCompass 是面向大模型评测的一站式平台。其主要特点如下 ## 📖 数据集支持 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- 语言 - - 知识 - - 推理 - - 考试 -
-
-字词释义 +我们已经在OpenCompass官网的文档中支持了所有可在本平台上使用的数据集的统计列表。 -- WiC -- SummEdits +您可以通过排序、筛选和搜索等功能从列表中快速找到您需要的数据集。 -
- -
-成语习语 - -- CHID - -
- -
-语义相似度 - -- AFQMC -- BUSTM - -
- -
-指代消解 - -- CLUEWSC -- WSC -- WinoGrande - -
- -
-翻译 - -- Flores -- IWSLT2017 - -
- -
-多语种问答 - -- TyDi-QA -- XCOPA - -
- -
-多语种总结 - -- XLSum - -
-
-
-知识问答 - -- BoolQ -- CommonSenseQA -- NaturalQuestions -- TriviaQA - -
-
-
-文本蕴含 - -- CMNLI -- OCNLI -- OCNLI_FC -- AX-b -- AX-g -- CB -- RTE -- ANLI - -
- -
-常识推理 - -- StoryCloze -- COPA -- ReCoRD -- HellaSwag -- PIQA -- SIQA - -
- -
-数学推理 - -- MATH -- GSM8K - -
- -
-定理应用 - -- TheoremQA -- StrategyQA -- SciBench - -
- -
-综合推理 - -- BBH - -
-
-
-初中/高中/大学/职业考试 - -- C-Eval -- AGIEval -- MMLU -- GAOKAO-Bench -- CMMLU -- ARC -- Xiezhi - -
- -
-医学考试 - -- CMB - -
-
- 理解 - - 长文本 - - 安全 - - 代码 -
-
-阅读理解 - -- C3 -- CMRC -- DRCD -- MultiRC -- RACE -- DROP -- OpenBookQA -- SQuAD2.0 - -
- -
-内容总结 - -- CSL -- LCSTS -- XSum -- SummScreen - -
- -
-内容分析 - -- EPRSTMT -- LAMBADA -- TNEWS - -
-
-
-长文本理解 - -- LEval -- LongBench -- GovReports -- NarrativeQA -- Qasper - -
-
-
-安全 - -- CivilComments -- CrowsPairs -- CValues -- JigsawMultilingual -- TruthfulQA - -
-
-健壮性 - -- AdvGLUE - -
-
-
-代码 - -- HumanEval -- HumanEvalX -- MBPP -- APPs -- DS1000 - -
-
+详情请参阅 [官方文档](https://opencompass.org.cn/doc) 的数据集统计章节。

🔝返回顶部

diff --git a/dataset-index.yml b/dataset-index.yml new file mode 100644 index 00000000..9fbde8bd --- /dev/null +++ b/dataset-index.yml @@ -0,0 +1,734 @@ + +- ifeval: + name: IFEval + category: Instruction Following + paper: https://arxiv.org/pdf/2311.07911 + configpath: opencompass/configs/datasets/IFEval +- nphard: + name: NPHardEval + category: Reasoning + paper: https://arxiv.org/pdf/2312.14890v2 + configpath: opencompass/configs/datasets/NPHardEval +- pmmeval: + name: PMMEval + category: Language + paper: https://arxiv.org/pdf/2411.09116v1 + configpath: opencompass/configs/datasets/PMMEval +- theoremqa: + name: TheroremQA + category: Reasoning + paper: https://arxiv.org/pdf/2305.12524 + configpath: opencompass/configs/datasets/TheroremQA +- agieval: + name: AGIEval + category: Examination + paper: https://arxiv.org/pdf/2304.06364 + configpath: opencompass/configs/datasets/agieval +- babilong: + name: BABILong + category: Long Context + paper: https://arxiv.org/pdf/2406.10149 + configpath: opencompass/configs/datasets/babilong +- bigcodebench: + name: BigCodeBench + category: Code + paper: https://arxiv.org/pdf/2406.15877 + configpath: opencompass/configs/datasets/bigcodebench +- calm: + name: CaLM + category: Reasoning + paper: https://arxiv.org/pdf/2405.00622 + configpath: opencompass/configs/datasets/calm +- infinitebench: + name: InfiniteBench (∞Bench) + category: Long Context + paper: https://aclanthology.org/2024.acl-long.814.pdf + configpath: opencompass/configs/datasets/infinitebench +- korbench: + name: KOR-Bench + category: Reasoning + paper: https://arxiv.org/pdf/2410.06526v1 + configpath: opencompass/configs/datasets/korbench +- lawbench: + name: LawBench + category: Knowledge / Law + paper: https://arxiv.org/pdf/2309.16289 + configpath: opencompass/configs/datasets/lawbench +- leval: + name: L-Eval + category: Long Context + paper: https://arxiv.org/pdf/2307.11088v1 + configpath: opencompass/configs/datasets/leval +- livecodebench: + name: LiveCodeBench + category: Code + paper: https://arxiv.org/pdf/2403.07974 + configpath: opencompass/configs/datasets/livecodebench +- livemathbench: + name: LiveMathBench + category: Math + paper: https://arxiv.org/pdf/2412.13147 + configpath: opencompass/configs/datasets/livemathbench +- longbench: + name: LongBench + category: Long Context + paper: https://github.com/THUDM/LongBench + configpath: opencompass/configs/datasets/livemathbench +- lveval: + name: LV-Eval + category: Long Context + paper: https://arxiv.org/pdf/2402.05136 + configpath: opencompass/configs/datasets/lveval +- medbench: + name: MedBench + category: Knowledge / Medicine + paper: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10778138 + configpath: opencompass/configs/datasets/MedBench +- musr: + name: MuSR + category: Reasoning + paper: https://arxiv.org/pdf/2310.16049 + configpath: opencompass/configs/datasets/musr +- needlebench: + name: NeedleBench + category: Long Context + paper: https://arxiv.org/pdf/2407.11963 + configpath: opencompass/configs/datasets/needlebench +- ruler: + name: RULER + category: Long Context + paper: https://arxiv.org/pdf/2404.06654 + configpath: opencompass/configs/datasets/ruler +- alignment: + name: AlignBench + category: Subjective / Alignment + paper: https://arxiv.org/pdf/2311.18743 + configpath: opencompass/configs/datasets/subjective/alignbench +- alpaca: + name: AlpacaEval + category: Subjective / Instruction Following + paper: https://github.com/tatsu-lab/alpaca_eval + configpath: opencompass/configs/datasets/subjective/aplaca_eval +- arenahard: + name: Arena-Hard + category: Subjective / Chatbot + paper: https://lmsys.org/blog/2024-04-19-arena-hard/ + configpath: opencompass/configs/datasets/subjective/arena_hard +- flames: + name: FLAMES + category: Subjective / Alignment + paper: https://arxiv.org/pdf/2311.06899 + configpath: opencompass/configs/datasets/subjective/flames +- fofo: + name: FOFO + category: Subjective / Format Following + paper: https://arxiv.org/pdf/2402.18667 + configpath: opencompass/configs/datasets/subjective/fofo +- followbench: + name: FollowBench + category: Subjective / Instruction Following + paper: https://arxiv.org/pdf/2310.20410 + configpath: opencompass/configs/datasets/subjective/followbench +- hellobench: + name: HelloBench + category: Subjective / Long Context + paper: https://arxiv.org/pdf/2409.16191 + configpath: opencompass/configs/datasets/subjective/hellobench +- judgerbench: + name: JudgerBench + category: Subjective / Long Context + paper: https://arxiv.org/pdf/2410.16256 + configpath: opencompass/configs/datasets/subjective/judgerbench +- multiround: + name: MT-Bench-101 + category: Subjective / Multi-Round + paper: https://arxiv.org/pdf/2402.14762 + configpath: opencompass/configs/datasets/subjective/multiround +- wildbench: + name: WildBench + category: Subjective / Real Task + paper: https://arxiv.org/pdf/2406.04770 + configpath: opencompass/configs/datasets/subjective/wildbench +- teval: + name: T-Eval + category: Tool Utilization + paper: https://arxiv.org/pdf/2312.14033 + configpath: opencompass/configs/datasets/teval +- finalceiq: + name: FinanceIQ + category: Knowledge / Finance + paper: https://github.com/Duxiaoman-DI/XuanYuan/tree/main/FinanceIQ + configpath: opencompass/configs/datasets/FinanceIQ +- gaokaobench: + name: GAOKAOBench + category: Examination + paper: https://arxiv.org/pdf/2305.12474 + configpath: opencompass/configs/datasets/GaokaoBench +- lcbench: + name: LCBench + category: Code + paper: https://github.com/open-compass/CodeBench/ + configpath: opencompass/configs/datasets/LCBench +- MMLUArabic: + name: ArabicMMLU + category: Language + paper: https://arxiv.org/pdf/2402.12840 + configpath: opencompass/configs/datasets/MMLUArabic +- OpenFinData: + name: OpenFinData + category: Knowledge / Finance + paper: https://github.com/open-compass/OpenFinData + configpath: opencompass/configs/datasets/OpenFinData +- QuALITY: + name: QuALITY + category: Long Context + paper: https://arxiv.org/pdf/2112.08608 + configpath: opencompass/configs/datasets/QuALITY +- advglue: + name: Adversarial GLUE + category: Safety + paper: https://openreview.net/pdf?id=GF9cSKI3A_q + configpath: opencompass/configs/datasets/adv_glue +- afqmcd: + name: CLUE / AFQMC + category: Language + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_afqmc +- aime2024: + name: AIME2024 + category: Examination + paper: https://huggingface.co/datasets/Maxwell-Jia/AIME_2024 + configpath: opencompass/configs/datasets/aime2024 +- anli: + name: Adversarial NLI + category: Reasoning + paper: https://arxiv.org/pdf/1910.14599v2 + configpath: opencompass/configs/datasets/anli +- anthropics_evals: + name: Anthropics Evals + category: Safety + paper: https://arxiv.org/pdf/2212.09251 + configpath: opencompass/configs/datasets/anthropics_evals +- apps: + name: APPS + category: Code + paper: https://arxiv.org/pdf/2105.09938 + configpath: opencompass/configs/datasets/apps +- arc: + name: ARC + category: Reasoning + paper: https://arxiv.org/pdf/1803.05457 + configpath: [opencompass/configs/datasets/ARC_c, opencompass/configs/datasets/ARC_e] +- arc_prize_public_eval: + name: ARC Prize + category: ARC-AGI + paper: https://arcprize.org/guide#private + configpath: opencompass/configs/datasets/ARC_Prize_Public_Evaluation +- ax: + name: SuperGLUE / AX + category: Reasoning + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: [opencompass/configs/datasets/SuperGLUE_AX_b, opencompass/configs/datasets/SuperGLUE_AX_g] +- bbh: + name: BIG-Bench Hard + category: Reasoning + paper: https://arxiv.org/pdf/2210.09261 + configpath: opencompass/configs/datasets/bbh +- BoolQ: + name: SuperGLUE / BoolQ + category: Knowledge + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_BoolQ +- c3: + name: CLUE / C3 (C³) + category: Understanding + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_C3 +- cb: + name: SuperGLUE / CB + category: Reasoning + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_CB +- ceval: + name: C-EVAL + category: Examination + paper: https://arxiv.org/pdf/2305.08322v1 + configpath: opencompass/configs/datasets/ceval +- charm: + name: CHARM + category: Reasoning + paper: https://arxiv.org/pdf/2403.14112 + configpath: opencompass/configs/datasets/CHARM +- chembench: + name: ChemBench + category: Knowledge / Chemistry + paper: https://arxiv.org/pdf/2404.01475 + configpath: opencompass/configs/datasets/ChemBench +- chid: + name: FewCLUE / CHID + category: Language + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_chid +- chinese_simpleqa: + name: Chinese SimpleQA + category: Knowledge + paper: https://arxiv.org/pdf/2411.07140 + configpath: opencompass/configs/datasets/chinese_simpleqa +- cibench: + name: CIBench + category: Code + paper: https://www.arxiv.org/pdf/2407.10499 + configpath: opencompass/configs/datasets/CIBench +- civilcomments: + name: CivilComments + category: Safety + paper: https://arxiv.org/pdf/1903.04561 + configpath: opencompass/configs/datasets/civilcomments +- clozeTest_maxmin: + name: Cloze Test-max/min + category: Code + paper: https://arxiv.org/pdf/2102.04664 + configpath: opencompass/configs/datasets/clozeTest_maxmin +- cluewsc: + name: FewCLUE / CLUEWSC + category: Language / WSC + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_cluewsc +- cmb: + name: CMB + category: Knowledge / Medicine + paper: https://arxiv.org/pdf/2308.08833 + configpath: opencompass/configs/datasets/cmb +- cmmlu: + name: CMMLU + category: Understanding + paper: https://arxiv.org/pdf/2306.09212 + configpath: opencompass/configs/datasets/cmmlu +- cmnli: + name: CLUE / CMNLI + category: Reasoning + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_cmnli +- cmo_fib: + name: cmo_fib + category: Examination + paper: "" + configpath: opencompass/configs/datasets/cmo_fib +- cmrc: + name: CLUE / CMRC + category: Understanding + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_CMRC +- commonsenseqa: + name: CommonSenseQA + category: Knowledge + paper: https://arxiv.org/pdf/1811.00937v2 + configpath: opencompass/configs/datasets/commonsenseqa +- commonsenseqa_cn: + name: CommonSenseQA-CN + category: Knowledge + paper: "" + configpath: opencompass/configs/datasets/commonsenseqa_cn +- copa: + name: SuperGLUE / COPA + category: Reasoning + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_COPA +- crowspairs: + name: CrowsPairs + category: Safety + paper: https://arxiv.org/pdf/2010.00133 + configpath: opencompass/configs/datasets/crowspairs +- crowspairs_cn: + name: CrowsPairs-CN + category: Safety + paper: "" + configpath: opencompass/configs/datasets/crowspairs_cn +- cvalues: + name: CVALUES + category: Safety + paper: http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/shanqi.xgh/release_github/CValues.pdf + configpath: opencompass/configs/datasets/cvalues +- drcd: + name: CLUE / DRCD + category: Understanding + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_DRCD +- drop: + name: DROP (DROP Simple Eval) + category: Understanding + paper: https://arxiv.org/pdf/1903.00161 + configpath: opencompass/configs/datasets/drop +- ds1000: + name: DS-1000 + category: Code + paper: https://arxiv.org/pdf/2211.11501 + configpath: opencompass/configs/datasets/ds1000 +- eprstmt: + name: FewCLUE / EPRSTMT + category: Understanding + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_eprstmt +- flores: + name: Flores + category: Language + paper: https://aclanthology.org/D19-1632.pdf + configpath: opencompass/configs/datasets/flores +- game24: + name: Game24 + category: Math + paper: https://huggingface.co/datasets/nlile/24-game + configpath: opencompass/configs/datasets/game24 +- govrepcrs: + name: Government Report Dataset + category: Long Context + paper: https://aclanthology.org/2021.naacl-main.112.pdf + configpath: opencompass/configs/datasets/govrepcrs +- gpqa: + name: GPQA + category: Knowledge + paper: https://arxiv.org/pdf/2311.12022v1 + configpath: opencompass/configs/datasets/gpqa +- gsm8k: + name: GSM8K + category: Math + paper: https://arxiv.org/pdf/2110.14168v2 + configpath: opencompass/configs/datasets/gsm8k +- gsm_hard: + name: GSM-Hard + category: Math + paper: https://proceedings.mlr.press/v202/gao23f/gao23f.pdf + configpath: opencompass/configs/datasets/gsm_hard +- hellaswag: + name: HellaSwag + category: Reasoning + paper: https://arxiv.org/pdf/1905.07830 + configpath: opencompass/configs/datasets/hellaswag +- humaneval: + name: HumanEval + category: Code + paper: https://arxiv.org/pdf/2107.03374v2 + configpath: opencompass/configs/datasets/humaneval +- humaneval_cn: + name: HumanEval-CN + category: Code + paper: "" + configpath: opencompass/configs/datasets/humaneval_cn +- humaneval_multi: + name: Multi-HumanEval + category: Code + paper: https://arxiv.org/pdf/2210.14868 + configpath: opencompass/configs/datasets/humaneval_multi +- humanevalx: + name: HumanEval-X + category: Code + paper: https://dl.acm.org/doi/pdf/10.1145/3580305.3599790 + configpath: opencompass/configs/datasets/humanevalx +- hungarian_math: + name: Hungarian_Math + category: Math + paper: https://huggingface.co/datasets/keirp/hungarian_national_hs_finals_exam + configpath: opencompass/configs/datasets/hungarian_exam +- iwslt2017: + name: IWSLT2017 + category: Language + paper: https://cris.fbk.eu/bitstream/11582/312796/1/iwslt17-overview.pdf + configpath: opencompass/configs/datasets/iwslt2017 +- jigsawmultilingual: + name: JigsawMultilingual + category: Safety + paper: https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data + configpath: opencompass/configs/datasets/jigsawmultilingual +- lambada: + name: LAMBADA + category: Understanding + paper: https://arxiv.org/pdf/1606.06031 + configpath: opencompass/configs/datasets/lambada +- lcsts: + name: LCSTS + category: Understanding + paper: https://aclanthology.org/D15-1229.pdf + configpath: opencompass/configs/datasets/lcsts +- livestembench: + name: LiveStemBench + category: "" + paper: "" + configpath: opencompass/configs/datasets/livestembench +- llm_compression: + name: LLM Compression + category: Bits Per Character (BPC) + paper: https://arxiv.org/pdf/2404.09937 + configpath: opencompass/configs/datasets/llm_compression +- math: + name: MATH + category: Math + paper: https://arxiv.org/pdf/2103.03874 + configpath: opencompass/configs/datasets/math +- math401: + name: MATH 401 + category: Math + paper: https://arxiv.org/pdf/2304.02015 + configpath: opencompass/configs/datasets/math401 +- mathbench: + name: MathBench + category: Math + paper: https://arxiv.org/pdf/2405.12209 + configpath: opencompass/configs/datasets/mathbench +- mbpp: + name: MBPP + category: Code + paper: https://arxiv.org/pdf/2108.07732 + configpath: opencompass/configs/datasets/mbpp +- mbpp_cn: + name: MBPP-CN + category: Code + paper: "" + configpath: opencompass/configs/datasets/mbpp_cn +- mbpp_plus: + name: MBPP-PLUS + category: Code + paper: "" + configpath: opencompass/configs/datasets/mbpp_plus +- mgsm: + name: MGSM + category: Language / Math + paper: https://arxiv.org/pdf/2210.03057 + configpath: opencompass/configs/datasets/mgsm +- mmlu: + name: MMLU + category: Understanding + paper: https://arxiv.org/pdf/2009.03300 + configpath: opencompass/configs/datasets/mmlu +- mmlu_cf: + name: MMLU-CF + category: Understanding + paper: https://arxiv.org/pdf/2412.15194 + configpath: opencompass/configs/datasets/mmlu_cf +- mmlu_pro: + name: MMLU-Pro + category: Understanding + paper: https://arxiv.org/pdf/2406.01574 + configpath: opencompass/configs/datasets/mmlu_pro +- mmmlu: + name: MMMLU + category: Language / Understanding + paper: https://huggingface.co/datasets/openai/MMMLU + configpath: opencompass/configs/datasets/mmmlu +- multirc: + name: SuperGLUE / MultiRC + category: Understanding + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_MultiRC +- narrativeqa: + name: NarrativeQA + category: Understanding + paper: https://github.com/google-deepmind/narrativeqa + configpath: opencompass/configs/datasets/narrativeqa +- natural_question: + name: NaturalQuestions + category: Knowledge + paper: https://github.com/google-research-datasets/natural-questions + configpath: opencompass/configs/datasets/nq +- natural_question_cn: + name: NaturalQuestions-CN + category: Knowledge + paper: "" + configpath: opencompass/configs/datasets/nq_cn +- obqa: + name: OpenBookQA + category: Knowledge + paper: https://arxiv.org/pdf/1809.02789v1 + configpath: opencompass/configs/datasets/obqa +- piqa: + name: OpenBookQA + category: Knowledge / Physics + paper: https://arxiv.org/pdf/1911.11641v1 + configpath: opencompass/configs/datasets/piqa +- py150: + name: py150 + category: Code + paper: https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/CodeCompletion-line + configpath: opencompass/configs/datasets/py150 +- qasper: + name: Qasper + category: Long Context + paper: https://arxiv.org/pdf/2105.03011 + configpath: opencompass/configs/datasets/qasper +- qaspercut: + name: Qasper-Cut + category: Long Context + paper: "" + configpath: opencompass/configs/datasets/qaspercut +- race: + name: RACE + category: Examination + paper: https://arxiv.org/pdf/1704.04683 + configpath: opencompass/configs/datasets/race +- realtoxicprompts: + name: RealToxicPrompts + category: Safety + paper: https://arxiv.org/pdf/2009.11462 + configpath: opencompass/configs/datasets/realtoxicprompts +- record: + name: SuperGLUE / ReCoRD + category: Understanding + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_ReCoRD +- rte: + name: SuperGLUE / RTE + category: Reasoning + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_RTE +- ocnli: + name: CLUE / OCNLI + category: Reasoning + paper: https://arxiv.org/pdf/2004.05986 + configpath: opencompass/configs/datasets/CLUE_ocnli +- rolebench: + name: RoleBench + category: Role Play + paper: https://arxiv.org/pdf/2310.00746 + configpath: opencompass/configs/datasets/rolebench +- s3eval: + name: S3Eval + category: Long Context + paper: https://aclanthology.org/2024.naacl-long.69.pdf + configpath: opencompass/configs/datasets/s3eval +- scibench: + name: SciBench + category: Reasoning + paper: https://sxkdz.github.io/files/publications/ICML/SciBench/SciBench.pdf + configpath: opencompass/configs/datasets/scibench +- scicode: + name: SciCode + category: Code + paper: https://arxiv.org/pdf/2407.13168 + configpath: opencompass/configs/datasets/scicode +- simpleqa: + name: SimpleQA + category: Knowledge + paper: https://arxiv.org/pdf/2411.04368 + configpath: opencompass/configs/datasets/SimpleQA +- siqa: + name: SocialIQA + category: Reasoning + paper: https://arxiv.org/pdf/1904.09728 + configpath: opencompass/configs/datasets/siqa +- squad20: + name: SQuAD2.0 + category: Understanding + paper: https://arxiv.org/pdf/1806.03822 + configpath: opencompass/configs/datasets/squad20 +- storycloze: + name: StoryCloze + category: Reasoning + paper: https://aclanthology.org/2022.emnlp-main.616.pdf + configpath: opencompass/configs/datasets/storycloze +- strategyqa: + name: StrategyQA + category: Reasoning + paper: https://arxiv.org/pdf/2101.02235 + configpath: opencompass/configs/datasets/strategyqa +- summedits: + name: SummEdits + category: Language + paper: https://aclanthology.org/2023.emnlp-main.600.pdf + configpath: opencompass/configs/datasets/summedits +- summscreen: + name: SummScreen + category: Understanding + paper: https://arxiv.org/pdf/2104.07091v1 + configpath: opencompass/configs/datasets/summscreen +- svamp: + name: SVAMP + category: Math + paper: https://aclanthology.org/2021.naacl-main.168.pdf + configpath: opencompass/configs/datasets/SVAMP +- tabmwp: + name: TabMWP + category: Math / Table + paper: https://arxiv.org/pdf/2209.14610 + configpath: opencompass/configs/datasets/TabMWP +- taco: + name: TACO + category: Code + paper: https://arxiv.org/pdf/2312.14852 + configpath: opencompass/configs/datasets/taco +- tnews: + name: FewCLUE / TNEWS + category: Understanding + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_tnews +- bustm: + name: FewCLUE / BUSTM + category: Reasoning + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_bustm +- csl: + name: FewCLUE / CSL + category: Understanding + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_csl +- ocnli_fc: + name: FewCLUE / OCNLI-FC + category: Reasoning + paper: https://arxiv.org/pdf/2107.07498 + configpath: opencompass/configs/datasets/FewCLUE_ocnli_fc +- triviaqa: + name: TriviaQA + category: Knowledge + paper: https://arxiv.org/pdf/1705.03551v2 + configpath: opencompass/configs/datasets/triviaqa +- triviaqarc: + name: TriviaQA-RC + category: Knowledge / Understanding + paper: "" + configpath: opencompass/configs/datasets/triviaqarc +- truthfulqa: + name: TruthfulQA + category: Safety + paper: https://arxiv.org/pdf/2109.07958v2 + configpath: opencompass/configs/datasets/truthfulqa +- tydiqa: + name: TyDi-QA + category: Language + paper: https://storage.googleapis.com/tydiqa/tydiqa.pdf + configpath: opencompass/configs/datasets/tydiqa +- wic: + name: SuperGLUE / WiC + category: Language + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_WiC +- wsc: + name: SuperGLUE / WSC + category: Language / WSC + paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf + configpath: opencompass/configs/datasets/SuperGLUE_WSC +- winogrande: + name: WinoGrande + category: Language / WSC + paper: https://arxiv.org/pdf/1907.10641v2 + configpath: opencompass/configs/datasets/winogrande +- xcopa: + name: XCOPA + category: Language + paper: https://arxiv.org/pdf/2005.00333 + configpath: opencompass/configs/datasets/XCOPA +- xiezhi: + name: Xiezhi + category: Knowledge + paper: https://arxiv.org/pdf/2306.05783 + configpath: opencompass/configs/datasets/xiezhi +- xlsum: + name: XLSum + category: Understanding + paper: https://arxiv.org/pdf/2106.13822v1 + configpath: opencompass/configs/datasets/XLSum +- xsum: + name: Xsum + category: Understanding + paper: https://arxiv.org/pdf/1808.08745 + configpath: opencompass/configs/datasets/Xsum + + + diff --git a/docs/en/_static/js/custom.js b/docs/en/_static/js/custom.js index 84da69d4..9b9f2480 100644 --- a/docs/en/_static/js/custom.js +++ b/docs/en/_static/js/custom.js @@ -1,10 +1,20 @@ -var collapsedSections = []; +var collapsedSections = ['Dataset Statistics']; $(document).ready(function () { - $('.model-summary').DataTable({ + $('.dataset').DataTable({ "stateSave": false, "lengthChange": false, "pageLength": 20, - "order": [] + "order": [], + "language": { + "info": "Show _START_ to _END_ Items(Totally _TOTAL_ )", + "infoFiltered": "(Filtered from _MAX_ Items)", + "search": "Search:", + "zeroRecords": "Item Not Found", + "paginate": { + "next": "Next", + "previous": "Previous" + }, + } }); }); diff --git a/docs/en/advanced_guides/new_dataset.md b/docs/en/advanced_guides/new_dataset.md index 72f33318..e07e6868 100644 --- a/docs/en/advanced_guides/new_dataset.md +++ b/docs/en/advanced_guides/new_dataset.md @@ -90,4 +90,16 @@ Although OpenCompass has already included most commonly used datasets, users nee return dataset ``` +3. After completing the dataset script and config file, you need to register the information of your new dataset in the file `dataset-index.yml` at the main directory, so that it can be added to the dataset statistics list on the OpenCompass website. + + - The keys that need to be filled in include `name`: the name of your dataset, `category`: the category of your dataset, `paper`: the URL of the paper or project, and `configpath`: the path to the dataset config file. Here's an example: + + ``` + - mydataset: + name: MyDataset + category: Understanding + paper: https://arxiv.org/pdf/xxxxxxx + configpath: opencompass/configs/datasets/MyDataset + ``` + Detailed dataset configuration files and other required configuration files can be referred to in the [Configuration Files](../user_guides/config.md) tutorial. For guides on launching tasks, please refer to the [Quick Start](../get_started/quick_start.md) tutorial. diff --git a/docs/en/conf.py b/docs/en/conf.py index 64a3a83a..9101ba3f 100644 --- a/docs/en/conf.py +++ b/docs/en/conf.py @@ -220,3 +220,11 @@ autodoc_typehints = 'none' # The not found page notfound_template = '404.html' + + +def builder_inited_handler(app): + subprocess.run(['./statis.py']) + + +def setup(app): + app.connect('builder-inited', builder_inited_handler) \ No newline at end of file diff --git a/docs/en/index.rst b/docs/en/index.rst index fdad9c9e..7181c459 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -80,6 +80,13 @@ We always welcome *PRs* and *Issues* for the betterment of OpenCompass. tools.md +.. _Dataset List: +.. toctree:: + :maxdepth: 1 + :caption: Dataset List + + dataset_statistics.md + .. _Notes: .. toctree:: :maxdepth: 1 diff --git a/docs/en/statis.py b/docs/en/statis.py new file mode 100755 index 00000000..a110c631 --- /dev/null +++ b/docs/en/statis.py @@ -0,0 +1,76 @@ +#! /usr/bin/env python + +from pathlib import Path + +import yaml +from tabulate import tabulate + +OC_ROOT = Path(__file__).absolute().parents[2] +GITHUB_PREFIX = 'https://github.com/open-compass/opencompass/tree/main/' +DATASETZOO_TEMPLATE = """\ +# Dataset Statistics + +On this page, we have listed all the datasets supported by OpenCompass. + +You can use sorting and search functions to find the dataset you need. + +""" + +with open('dataset_statistics.md', 'w') as f: + f.write(DATASETZOO_TEMPLATE) + +load_path = str(OC_ROOT / 'dataset-index.yml') + +with open(load_path, 'r') as f2: + data_list = yaml.load(f2, Loader=yaml.FullLoader) + +HEADER = ['name', 'category', 'paper', 'configpath'] + + +def table_format(data_list): + table_format_list = [] + for i in data_list: + table_format_list_sub = [] + for j in i: + for index in HEADER: + if index == 'paper': + table_format_list_sub.append('[link](' + i[j][index] + ')') + elif index == 'configpath': + if isinstance(i[j][index], list): + sub_list_text = '' + for k in i[j][index]: + sub_list_text += ('[link](' + GITHUB_PREFIX + k + + ') / ') + table_format_list_sub.append(sub_list_text[:-2]) + else: + table_format_list_sub.append('[link](' + + GITHUB_PREFIX + + i[j][index] + ')') + else: + table_format_list_sub.append(i[j][index]) + table_format_list.append(table_format_list_sub) + return table_format_list + + +data_format_list = table_format(data_list) + + +def generate_table(data_list, title=None): + + with open('dataset_statistics.md', 'a') as f: + if title is not None: + f.write(f'\n{title}') + f.write("""\n```{table}\n:class: dataset\n""") + header = ['Name', 'Category', 'Paper or Repository', 'Config File'] + table_cfg = dict(tablefmt='pipe', + floatfmt='.2f', + numalign='right', + stralign='center') + f.write(tabulate(data_list, header, **table_cfg)) + f.write('\n```\n') + + +generate_table( + data_list=data_format_list, + title='## Supported Dataset List', +) diff --git a/docs/zh_cn/_static/js/custom.js b/docs/zh_cn/_static/js/custom.js index 84da69d4..ecbff47e 100644 --- a/docs/zh_cn/_static/js/custom.js +++ b/docs/zh_cn/_static/js/custom.js @@ -1,10 +1,20 @@ -var collapsedSections = []; +var collapsedSections = ['数据集统计']; $(document).ready(function () { - $('.model-summary').DataTable({ + $('.dataset').DataTable({ "stateSave": false, "lengthChange": false, "pageLength": 20, - "order": [] + "order": [], + "language": { + "info": "显示 _START_ 至 _END_ 条目(总计 _TOTAL_ )", + "infoFiltered": "(筛选自 _MAX_ 条目)", + "search": "搜索:", + "zeroRecords": "没有找到任何条目", + "paginate": { + "next": "下一页", + "previous": "上一页" + }, + } }); }); diff --git a/docs/zh_cn/advanced_guides/new_dataset.md b/docs/zh_cn/advanced_guides/new_dataset.md index 16e85f37..16921885 100644 --- a/docs/zh_cn/advanced_guides/new_dataset.md +++ b/docs/zh_cn/advanced_guides/new_dataset.md @@ -91,4 +91,16 @@ return dataset ``` +3. 在完成数据集脚本和配置文件的构建后,需要在OpenCompass主目录下的`dataset-index.yml`配置文件中登记新数据集的相关信息,以使其加入OpenCompass官网Doc的数据集统计列表中。 + + - 需要填写的字段包括数据集名称`name`、数据集类型`category`、原文或项目地址`paper`、以及数据集配置文件的路径`configpath`。具体示例如下: + + ``` + - mydataset: + name: MyDataset + category: Understanding + paper: https://arxiv.org/pdf/xxxxxxx + configpath: opencompass/configs/datasets/MyDataset + ``` + 详细的数据集配置文件以及其他需要的配置文件可以参考[配置文件](../user_guides/config.md)教程,启动任务相关的教程可以参考[快速开始](../get_started/quick_start.md)教程。 diff --git a/docs/zh_cn/conf.py b/docs/zh_cn/conf.py index 640ea1d8..8910ead0 100644 --- a/docs/zh_cn/conf.py +++ b/docs/zh_cn/conf.py @@ -224,6 +224,7 @@ notfound_template = '404.html' def builder_inited_handler(app): subprocess.run(['./cp_origin_docs.sh']) + subprocess.run(['./statis.py']) def setup(app): diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 37a3bc0c..827c7d91 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -81,6 +81,13 @@ OpenCompass 上手路线 tools.md +.. _数据集列表: +.. toctree:: + :maxdepth: 1 + :caption: 数据集列表 + + dataset_statistics.md + .. _其他说明: .. toctree:: :maxdepth: 1 diff --git a/docs/zh_cn/statis.py b/docs/zh_cn/statis.py new file mode 100755 index 00000000..eb5dc7fe --- /dev/null +++ b/docs/zh_cn/statis.py @@ -0,0 +1,75 @@ +#! /usr/bin/env python + +from pathlib import Path + +import yaml +from tabulate import tabulate + +OC_ROOT = Path(__file__).absolute().parents[2] +GITHUB_PREFIX = 'https://github.com/open-compass/opencompass/tree/main/' +DATASETZOO_TEMPLATE = """\ +# 数据集统计 + +在本页面中,我们列举了OpenCompass所支持的所有数据集。 + +你可以使用排序和搜索功能找到需要的数据集。 + +""" + +with open('dataset_statistics.md', 'w') as f: + f.write(DATASETZOO_TEMPLATE) + +load_path = str(OC_ROOT / 'dataset-index.yml') + +with open(load_path, 'r') as f2: + data_list = yaml.load(f2, Loader=yaml.FullLoader) + +HEADER = ['name', 'category', 'paper', 'configpath'] + + +def table_format(data_list): + table_format_list = [] + for i in data_list: + table_format_list_sub = [] + for j in i: + for index in HEADER: + if index == 'paper': + table_format_list_sub.append('[链接](' + i[j][index] + ')') + elif index == 'configpath': + if isinstance(i[j][index], list): + sub_list_text = '' + for k in i[j][index]: + sub_list_text += ('[链接](' + GITHUB_PREFIX + k + + ') / ') + table_format_list_sub.append(sub_list_text[:-2]) + else: + table_format_list_sub.append('[链接](' + GITHUB_PREFIX + + i[j][index] + ')') + else: + table_format_list_sub.append(i[j][index]) + table_format_list.append(table_format_list_sub) + return table_format_list + + +data_format_list = table_format(data_list) + + +def generate_table(data_list, title=None): + + with open('dataset_statistics.md', 'a') as f: + if title is not None: + f.write(f'\n{title}') + f.write("""\n```{table}\n:class: dataset\n""") + header = ['数据集名称', '数据集类型', '原文或资源地址', '配置文件链接'] + table_cfg = dict(tablefmt='pipe', + floatfmt='.2f', + numalign='right', + stralign='center') + f.write(tabulate(data_list, header, **table_cfg)) + f.write('\n```\n') + + +generate_table( + data_list=data_format_list, + title='## 支持数据集列表', +)