diff --git a/dataset-index.yml b/dataset-index.yml index 9fbde8bd..c764c369 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -399,6 +399,11 @@ category: Math paper: https://proceedings.mlr.press/v202/gao23f/gao23f.pdf configpath: opencompass/configs/datasets/gsm_hard +- hellaswag: + name: HLE + category: Reasoning + paper: https://lastexam.ai/paper + configpath: opencompass/configs/datasets/HLE - hellaswag: name: HellaSwag category: Reasoning diff --git a/opencompass/configs/datasets/HLE/hle_gen.py b/opencompass/configs/datasets/HLE/hle_gen.py index a4ff86b4..598f1dde 100644 --- a/opencompass/configs/datasets/HLE/hle_gen.py +++ b/opencompass/configs/datasets/HLE/hle_gen.py @@ -1,4 +1,5 @@ from mmengine.config import read_base with read_base(): - from .hle_llmjudge_gen_63a000 import hle_datasets # noqa: F401, F403 + # Default use LLM as a judge + from .hle_llmverify_gen_6ff468 import hle_datasets # noqa: F401, F403 diff --git a/opencompass/configs/datasets/HLE/hle_llmjudge_gen_6ff468.py b/opencompass/configs/datasets/HLE/hle_llmverify_gen_6ff468.py similarity index 100% rename from opencompass/configs/datasets/HLE/hle_llmjudge_gen_6ff468.py rename to opencompass/configs/datasets/HLE/hle_llmverify_gen_6ff468.py