From 5a2462a26faf56c0990513309afb7676aa50a8d8 Mon Sep 17 00:00:00 2001 From: liushz Date: Mon, 3 Mar 2025 10:52:30 +0000 Subject: [PATCH] Add HLE dataset --- dataset-index.yml | 5 +++++ opencompass/configs/datasets/HLE/hle_gen.py | 3 ++- ...le_llmjudge_gen_6ff468.py => hle_llmverify_gen_6ff468.py} | 0 3 files changed, 7 insertions(+), 1 deletion(-) rename opencompass/configs/datasets/HLE/{hle_llmjudge_gen_6ff468.py => hle_llmverify_gen_6ff468.py} (100%) diff --git a/dataset-index.yml b/dataset-index.yml index 9fbde8bd..c764c369 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -399,6 +399,11 @@ category: Math paper: https://proceedings.mlr.press/v202/gao23f/gao23f.pdf configpath: opencompass/configs/datasets/gsm_hard +- hellaswag: + name: HLE + category: Reasoning + paper: https://lastexam.ai/paper + configpath: opencompass/configs/datasets/HLE - hellaswag: name: HellaSwag category: Reasoning diff --git a/opencompass/configs/datasets/HLE/hle_gen.py b/opencompass/configs/datasets/HLE/hle_gen.py index a4ff86b4..598f1dde 100644 --- a/opencompass/configs/datasets/HLE/hle_gen.py +++ b/opencompass/configs/datasets/HLE/hle_gen.py @@ -1,4 +1,5 @@ from mmengine.config import read_base with read_base(): - from .hle_llmjudge_gen_63a000 import hle_datasets # noqa: F401, F403 + # Default use LLM as a judge + from .hle_llmverify_gen_6ff468 import hle_datasets # noqa: F401, F403 diff --git a/opencompass/configs/datasets/HLE/hle_llmjudge_gen_6ff468.py b/opencompass/configs/datasets/HLE/hle_llmverify_gen_6ff468.py similarity index 100% rename from opencompass/configs/datasets/HLE/hle_llmjudge_gen_6ff468.py rename to opencompass/configs/datasets/HLE/hle_llmverify_gen_6ff468.py