mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

Added MaritimeBench dataset, including dataset metadata, configuration files, data processing logic, and a text post-processing function. This dataset is designed to evaluate AI models' domain knowledge and reasoning ability in the maritime field.
156 lines
6.9 KiB
Python
156 lines
6.9 KiB
Python
from .advglue import * # noqa: F401, F403
|
|
from .afqmcd import * # noqa: F401, F403
|
|
from .agieval import * # noqa: F401, F403
|
|
from .aime2024 import * # noqa: F401, F403
|
|
from .anli import AnliDataset # noqa: F401, F403
|
|
from .anthropics_evals import * # noqa: F401, F403
|
|
from .apps import * # noqa: F401, F403
|
|
from .arc import * # noqa: F401, F403
|
|
from .arc_prize_public_evaluation import * # noqa: F401, F403
|
|
from .ax import * # noqa: F401, F403
|
|
from .babilong import * # noqa: F401, F403
|
|
from .bbeh import * # noqa: F401, F403
|
|
from .bbh import * # noqa: F401, F403
|
|
from .bigcodebench import * # noqa: F401, F403
|
|
from .boolq import * # noqa: F401, F403
|
|
from .bustum import * # noqa: F401, F403
|
|
from .c3 import * # noqa: F401, F403
|
|
from .calm import * # noqa: F401, F403
|
|
from .cb import * # noqa: F401, F403
|
|
from .ceval import * # noqa: F401, F403
|
|
from .charm import * # noqa: F401, F403
|
|
from .chembench import * # noqa: F401, F403
|
|
from .chid import * # noqa: F401, F403
|
|
from .chinese_simpleqa import * # noqa: F401, F403
|
|
from .cibench import * # noqa: F401, F403
|
|
from .circular import * # noqa: F401, F403
|
|
from .civilcomments import * # noqa: F401, F403
|
|
from .clozeTest_maxmin import * # noqa: F401, F403
|
|
from .cluewsc import * # noqa: F401, F403
|
|
from .cmb import * # noqa: F401, F403
|
|
from .cmmlu import * # noqa: F401, F403
|
|
from .cmnli import * # noqa: F401, F403
|
|
from .cmo_fib import * # noqa: F401, F403
|
|
from .cmrc import * # noqa: F401, F403
|
|
from .commonsenseqa import * # noqa: F401, F403
|
|
from .commonsenseqa_cn import * # noqa: F401, F403
|
|
from .copa import * # noqa: F401, F403
|
|
from .crowspairs import * # noqa: F401, F403
|
|
from .crowspairs_cn import * # noqa: F401, F403
|
|
from .csl import * # noqa: F401, F403
|
|
from .custom import * # noqa: F401, F403
|
|
from .cvalues import * # noqa: F401, F403
|
|
from .dingo import * # noqa: F401, F403
|
|
from .drcd import * # noqa: F401, F403
|
|
from .drop import * # noqa: F401, F403
|
|
from .drop_simple_eval import * # noqa: F401, F403
|
|
from .ds1000 import * # noqa: F401, F403
|
|
from .ds1000_interpreter import * # noqa: F401, F403
|
|
from .eprstmt import * # noqa: F401, F403
|
|
from .FinanceIQ import * # noqa: F401, F403
|
|
from .flores import * # noqa: F401, F403
|
|
from .game24 import * # noqa: F401, F403
|
|
from .gaokao_math import * # noqa: F401, F403
|
|
from .GaokaoBench import * # noqa: F401, F403
|
|
from .generic import * # noqa: F401, F403
|
|
from .govrepcrs import * # noqa: F401, F403
|
|
from .gpqa import * # noqa: F401, F403
|
|
from .gsm8k import * # noqa: F401, F403
|
|
from .gsm_hard import * # noqa: F401, F403
|
|
from .hellaswag import * # noqa: F401, F403
|
|
from .hle import * # noqa: F401, F403
|
|
from .huggingface import * # noqa: F401, F403
|
|
from .humaneval import * # noqa: F401, F403
|
|
from .humaneval_multi import * # noqa: F401, F403
|
|
from .humanevalx import * # noqa: F401, F403
|
|
from .hungarian_math import * # noqa: F401, F403
|
|
from .IFEval.ifeval import IFEvalDataset, IFEvaluator # noqa: F401, F403
|
|
from .inference_ppl import InferencePPLDataset # noqa: F401, F403
|
|
from .infinitebench import * # noqa: F401, F403
|
|
from .iwslt2017 import * # noqa: F401, F403
|
|
from .jigsawmultilingual import * # noqa: F401, F403
|
|
from .jsonl import JsonlDataset # noqa: F401, F403
|
|
from .kaoshi import KaoshiDataset, KaoshiEvaluator # noqa: F401, F403
|
|
from .korbench import * # noqa: F401, F403
|
|
from .lambada import * # noqa: F401, F403
|
|
from .lawbench import * # noqa: F401, F403
|
|
from .LCBench import * # noqa: F401, F403
|
|
from .lcsts import * # noqa: F401, F403
|
|
from .leval import * # noqa: F401, F403
|
|
from .livecodebench import * # noqa: F401, F403
|
|
from .livemathbench import * # noqa: F401, F403
|
|
from .livereasonbench import * # noqa: F401, F403
|
|
from .livestembench import * # noqa: F401, F403
|
|
from .llm_compression import LLMCompressionDataset # noqa: F401, F403
|
|
from .longbench import * # noqa: F401, F403
|
|
from .longbenchv2 import * # noqa: F401, F403
|
|
from .lveval import * # noqa: F401, F403
|
|
from .maritime_bench import * # noqa: F401, F403
|
|
from .mastermath2024v1 import * # noqa: F401, F403
|
|
from .math import * # noqa: F401, F403
|
|
from .math401 import * # noqa: F401, F403
|
|
from .math_intern import * # noqa: F401, F403
|
|
from .mathbench import * # noqa: F401, F403
|
|
from .mbpp import * # noqa: F401, F403
|
|
from .medbench import * # noqa: F401, F403
|
|
from .MedXpertQA import * # noqa: F401, F403
|
|
from .mgsm import * # noqa: F401, F403
|
|
from .mmlu import * # noqa: F401, F403
|
|
from .mmlu_cf import * # noqa: F401, F403
|
|
from .mmlu_pro import * # noqa: F401, F403
|
|
from .MMLUArabic import * # noqa: F401, F403
|
|
from .mmmlu import * # noqa: F401, F403
|
|
from .multipl_e import * # noqa: F401, F403
|
|
from .multirc import * # noqa: F401, F403
|
|
from .musr import * # noqa: F401, F403
|
|
from .narrativeqa import * # noqa: F401, F403
|
|
from .natural_question import * # noqa: F401, F403
|
|
from .natural_question_cn import * # noqa: F401, F403
|
|
from .NPHardEval import * # noqa: F401, F403
|
|
from .obqa import * # noqa: F401, F403
|
|
from .olymmath import * # noqa: F401, F403
|
|
from .OlympiadBench import * # noqa: F401, F403
|
|
from .OpenFinData import * # noqa: F401, F403
|
|
from .piqa import * # noqa: F401, F403
|
|
from .py150 import * # noqa: F401, F403
|
|
from .qasper import * # noqa: F401, F403
|
|
from .qaspercut import * # noqa: F401, F403
|
|
from .QuALITY import * # noqa: F401, F403
|
|
from .race import * # noqa: F401, F403
|
|
from .realtoxicprompts import * # noqa: F401, F403
|
|
from .reasonbench import ReasonBenchDataset # noqa: F401, F403
|
|
from .record import * # noqa: F401, F403
|
|
from .ruler import * # noqa: F401, F403
|
|
from .safety import * # noqa: F401, F403
|
|
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
|
from .scicode import * # noqa: F401, F403
|
|
from .simpleqa import * # noqa: F401, F403
|
|
from .siqa import * # noqa: F401, F403
|
|
from .squad20 import SQuAD20Dataset, SQuAD20Evaluator # noqa: F401, F403
|
|
from .storycloze import * # noqa: F401, F403
|
|
from .strategyqa import * # noqa: F401, F403
|
|
from .subjective import * # noqa: F401, F403
|
|
from .summedits import * # noqa: F401, F403
|
|
from .summscreen import * # noqa: F401, F403
|
|
from .supergpqa import * # noqa: F401, F403
|
|
from .svamp import * # noqa: F401, F403
|
|
from .tabmwp import * # noqa: F401, F403
|
|
from .taco import * # noqa: F401, F403
|
|
from .teval import * # noqa: F401, F403
|
|
from .TheoremQA import * # noqa: F401, F403
|
|
from .tnews import * # noqa: F401, F403
|
|
from .triviaqa import * # noqa: F401, F403
|
|
from .triviaqarc import * # noqa: F401, F403
|
|
from .truthfulqa import * # noqa: F401, F403
|
|
from .tydiqa import * # noqa: F401, F403
|
|
from .wic import * # noqa: F401, F403
|
|
from .wikibench import * # noqa: F401, F403
|
|
from .winograd import * # noqa: F401, F403
|
|
from .winogrande import * # noqa: F401, F403
|
|
from .wnli import wnliDataset # noqa: F401, F403
|
|
from .wsc import * # noqa: F401, F403
|
|
from .xcopa import * # noqa: F401, F403
|
|
from .xiezhi import XiezhiDataset, XiezhiRetriever # noqa: F401, F403
|
|
from .xlsum import * # noqa: F401, F403
|
|
from .xsum import * # noqa: F401, F403
|