mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* fix pip version * fix pip version * update (#1522) Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> * [Feature] Update Models (#1518) * Update Models * Update * Update humanevalx * Update * Update * [Feature] Dataset prompts update for ARC, BoolQ, Race (#1527) add judgerbench and reorg sub add judgerbench and reorg subeval add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval * add judgerbench and reorg subeval --------- Co-authored-by: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com> Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> Co-authored-by: Songyang Zhang <tonysy@users.noreply.github.com> Co-authored-by: Linchen Xiao <xxllcc1993@gmail.com>
28 lines
1.6 KiB
Python
28 lines
1.6 KiB
Python
# flake8: noqa: F401, F403
|
|
from .alignbench import AlignmentBenchDataset # noqa: F401, F403
|
|
from .alignbench import alignbench_postprocess # noqa: F401, F403
|
|
from .alpacaeval import AlpacaEvalDataset # noqa: F401, F403
|
|
from .alpacaeval import alpacaeval_postprocess # noqa: F401, F403
|
|
from .arena_hard import ArenaHardDataset # noqa: F401, F403
|
|
from .arena_hard import arenahard_postprocess # noqa: F401, F403
|
|
from .compassbench import CompassBenchDataset # noqa: F401, F403
|
|
from .compassbench_checklist import \
|
|
CompassBenchCheklistDataset # noqa: F401, F403
|
|
from .compassbench_control_length_bias import \
|
|
CompassBenchControlLengthBiasDataset # noqa: F401, F403
|
|
from .corev2 import Corev2Dataset # noqa: F401, F403
|
|
from .creationbench import CreationBenchDataset # noqa: F401, F403
|
|
from .flames import FlamesDataset # noqa: F401, F403
|
|
from .fofo import FofoDataset, fofo_postprocess # noqa: F401, F403
|
|
from .followbench import FollowBenchDataset # noqa: F401, F403
|
|
from .followbench import followbench_postprocess
|
|
from .judgerbench import JudgerBenchDataset # noqa: F401, F403
|
|
from .judgerbench import JudgerBenchEvaluator # noqa: F401, F403
|
|
from .mtbench import MTBenchDataset, mtbench_postprocess # noqa: F401, F403
|
|
from .mtbench101 import MTBench101Dataset # noqa: F401, F403
|
|
from .mtbench101 import mtbench101_postprocess
|
|
from .multiround import MultiroundDataset # noqa: F401, F403
|
|
from .subjective_cmp import SubjectiveCmpDataset # noqa: F401, F403
|
|
from .wildbench import WildBenchDataset # noqa: F401, F403
|
|
from .wildbench import wildbench_postprocess # noqa: F401, F403
|