diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen.py new file mode 100644 index 00000000..1f600c2e --- /dev/null +++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_afqmc_gen_db509b import afqmc_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen.py b/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen.py new file mode 100644 index 00000000..15c24fbc --- /dev/null +++ b/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_bustm_gen_305431 import bustm_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py b/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py new file mode 100644 index 00000000..d1dd4972 --- /dev/null +++ b/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_eprstmt_gen_d6d06d import eprstmt_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl.py b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl.py new file mode 100644 index 00000000..15d5aa4d --- /dev/null +++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_ocnli_fc_ppl_b828fc import ocnli_fc_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py new file mode 100644 index 00000000..8039d2ca --- /dev/null +++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_AX_b_gen_477186 import AX_b_datasets # noqa: F401, F403 diff --git a/configs/datasets/agieval/agieval_mixed.py b/configs/datasets/agieval/agieval_mixed.py new file mode 100644 index 00000000..a3e9ef58 --- /dev/null +++ b/configs/datasets/agieval/agieval_mixed.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .agieval_mixed_2f14ad import agieval_datasets # noqa: F401, F403 diff --git a/configs/datasets/lcsts/lcsts_gen_65108e.py b/configs/datasets/lcsts/lcsts_gen_65108e.py new file mode 100644 index 00000000..428ef1b6 --- /dev/null +++ b/configs/datasets/lcsts/lcsts_gen_65108e.py @@ -0,0 +1,28 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import RougeEvaluator +from opencompass.datasets import LCSTSDataset + +lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst') + +lcsts_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, template='阅读文章:{content}\n根据上文,给出简短的单个摘要:'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +lcsts_eval_cfg = dict( + evaluator=dict(type=RougeEvaluator), + pred_postprocessor=dict(type='lcsts'), +) + +lcsts_datasets = [ + dict( + type=LCSTSDataset, + abbr='lcsts', + path='./data/LCSTS', + reader_cfg=lcsts_reader_cfg, + infer_cfg=lcsts_infer_cfg, + eval_cfg=lcsts_eval_cfg) +] diff --git a/configs/datasets/mmlu/mmlu_gen.py b/configs/datasets/mmlu/mmlu_gen.py new file mode 100644 index 00000000..d0612974 --- /dev/null +++ b/configs/datasets/mmlu/mmlu_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .mmlu_gen_a568f1 import mmlu_datasets # noqa: F401, F403 diff --git a/configs/datasets/winogrande/winogrande_ppl.py b/configs/datasets/winogrande/winogrande_ppl.py new file mode 100644 index 00000000..75cd6660 --- /dev/null +++ b/configs/datasets/winogrande/winogrande_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .winogrande_ppl_00f8ad import winogrande_datasets # noqa: F401, F403 diff --git a/configs/summarizers/groups/mmlu.py b/configs/summarizers/groups/mmlu.py new file mode 100644 index 00000000..e581dba4 --- /dev/null +++ b/configs/summarizers/groups/mmlu.py @@ -0,0 +1,23 @@ +mmlu_summary_groups = [] + +_mmlu_humanities = ['formal_logic', 'high_school_european_history', 'high_school_us_history', 'high_school_world_history', 'international_law', 'jurisprudence', 'logical_fallacies', 'moral_disputes', 'moral_scenarios', 'philosophy', 'prehistory', 'professional_law', 'world_religions'] +_mmlu_humanities = ['lukaemon_mmlu_' + s for s in _mmlu_humanities] +mmlu_summary_groups.append({'name': 'mmlu-humanities', 'subsets': _mmlu_humanities}) + +_mmlu_stem = ['abstract_algebra', 'anatomy', 'astronomy', 'college_biology', 'college_chemistry', 'college_computer_science', 'college_mathematics', 'college_physics', 'computer_security', 'conceptual_physics', 'electrical_engineering', 'elementary_mathematics', 'high_school_biology', 'high_school_chemistry', 'high_school_computer_science', 'high_school_mathematics', 'high_school_physics', 'high_school_statistics', 'machine_learning'] +_mmlu_stem = ['lukaemon_mmlu_' + s for s in _mmlu_stem] +mmlu_summary_groups.append({'name': 'mmlu-stem', 'subsets': _mmlu_stem}) + +_mmlu_social_science = ['econometrics', 'high_school_geography', 'high_school_government_and_politics', 'high_school_macroeconomics', 'high_school_microeconomics', 'high_school_psychology', 'human_sexuality', 'professional_psychology', 'public_relations', 'security_studies', 'sociology', 'us_foreign_policy'] +_mmlu_social_science = ['lukaemon_mmlu_' + s for s in _mmlu_social_science] +mmlu_summary_groups.append({'name': 'mmlu-social-science', 'subsets': _mmlu_social_science}) + +_mmlu_other = ['business_ethics', 'clinical_knowledge', 'college_medicine', 'global_facts', 'human_aging', 'management', 'marketing', 'medical_genetics', 'miscellaneous', 'nutrition', 'professional_accounting', 'professional_medicine', 'virology'] +_mmlu_other = ['lukaemon_mmlu_' + s for s in _mmlu_other] +mmlu_summary_groups.append({'name': 'mmlu-other', 'subsets': _mmlu_other}) + +_mmlu_all = _mmlu_humanities + _mmlu_stem + _mmlu_social_science + _mmlu_other +_mmlu_weights = {'college_biology': 144,'college_chemistry': 100,'college_computer_science': 100,'college_mathematics': 100,'college_physics': 102,'electrical_engineering': 145,'astronomy': 152,'anatomy': 135,'abstract_algebra': 100,'machine_learning': 112,'clinical_knowledge': 265,'global_facts': 100,'management': 103,'nutrition': 306,'marketing': 234,'professional_accounting': 282,'high_school_geography': 198,'international_law': 121,'moral_scenarios': 895,'computer_security': 100,'high_school_microeconomics': 238,'professional_law': 1534,'medical_genetics': 100,'professional_psychology': 612,'jurisprudence': 108,'world_religions': 171,'philosophy': 311,'virology': 166,'high_school_chemistry': 203,'public_relations': 110,'high_school_macroeconomics': 390,'human_sexuality': 131,'elementary_mathematics': 378,'high_school_physics': 151,'high_school_computer_science': 100,'high_school_european_history': 165,'business_ethics': 100,'moral_disputes': 346,'high_school_statistics': 216,'miscellaneous': 783,'formal_logic': 126,'high_school_government_and_politics': 193,'prehistory': 324,'security_studies': 245,'high_school_biology': 310,'logical_fallacies': 163,'high_school_world_history': 237,'professional_medicine': 272,'high_school_mathematics': 270,'college_medicine': 173,'high_school_us_history': 204,'sociology': 201,'econometrics': 114,'high_school_psychology': 545,'human_aging': 223,'us_foreign_policy': 100,'conceptual_physics': 235} +_mmlu_weights = {'lukaemon_mmlu_' + k : v for k,v in _mmlu_weights.items()} +mmlu_summary_groups.append({'name': 'mmlu', 'subsets': _mmlu_all}) +mmlu_summary_groups.append({'name': 'mmlu-weighted', 'subsets': _mmlu_all, 'weights': _mmlu_weights}) diff --git a/docs/en/_templates/autosummary/class.rst b/docs/en/_templates/autosummary/class.rst new file mode 100644 index 00000000..4c3a7a9a --- /dev/null +++ b/docs/en/_templates/autosummary/class.rst @@ -0,0 +1,13 @@ +.. role:: hidden + :class: hidden-section +.. currentmodule:: {{ module }} + + +{{ name | underline}} + +.. autoclass:: {{ name }} + :members: + +.. + autogenerated from _templates/autosummary/class.rst + note it does not have :inherited-members: diff --git a/docs/en/docutils.conf b/docs/en/docutils.conf new file mode 100644 index 00000000..0c00c846 --- /dev/null +++ b/docs/en/docutils.conf @@ -0,0 +1,2 @@ +[html writers] +table_style: colwidths-auto diff --git a/docs/en/tools.md b/docs/en/tools.md new file mode 100644 index 00000000..b806c842 --- /dev/null +++ b/docs/en/tools.md @@ -0,0 +1 @@ +# Userful Tools diff --git a/docs/en/user_guides/metrics.md b/docs/en/user_guides/metrics.md new file mode 100644 index 00000000..9eb8bc83 --- /dev/null +++ b/docs/en/user_guides/metrics.md @@ -0,0 +1 @@ +# Metric Calculation diff --git a/docs/zh_cn/Makefile b/docs/zh_cn/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/zh_cn/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/opencompass/openicl/icl_retriever/__init__.py b/opencompass/openicl/icl_retriever/__init__.py new file mode 100644 index 00000000..739620b8 --- /dev/null +++ b/opencompass/openicl/icl_retriever/__init__.py @@ -0,0 +1,9 @@ +from .icl_base_retriever import BaseRetriever +from .icl_bm25_retriever import BM25Retriever +from .icl_dpp_retriever import DPPRetriever +from .icl_fix_k_retriever import FixKRetriever +from .icl_mdl_retriever import MDLRetriever +from .icl_random_retriever import RandomRetriever +from .icl_topk_retriever import TopkRetriever +from .icl_votek_retriever import VotekRetriever +from .icl_zero_retriever import ZeroRetriever diff --git a/requirements/runtime.txt b/requirements/runtime.txt new file mode 100644 index 00000000..77b80bbe --- /dev/null +++ b/requirements/runtime.txt @@ -0,0 +1,26 @@ +accelerate>=0.19.0 +datasets>=2.12.0 +evaluate>=0.3.0 +faiss_gpu==1.7.2 +nltk==3.8 +numpy==1.23.4 +openai==0.27.1 +rank_bm25==0.2.2 +requests==2.28.1 +scikit_learn==1.2.1 +sentence_transformers==2.2.2 +torch>=1.13.1 +tqdm==4.64.1 +transformers>=4.29.1 +openai +mmengine +jieba +pandas<2.0.0 +cpm_kernels +tokenizers>=0.13.3 +tabulate +fairscale +colossalai +tabulate +boto3 +tiktoken \ No newline at end of file