diff --git a/opencompass/configs/datasets/SciKnowEval/SciKnowEval_gen_ebe47d.py b/opencompass/configs/datasets/SciKnowEval/SciKnowEval_gen_ebe47d.py index 703d1540..50c42df6 100644 --- a/opencompass/configs/datasets/SciKnowEval/SciKnowEval_gen_ebe47d.py +++ b/opencompass/configs/datasets/SciKnowEval/SciKnowEval_gen_ebe47d.py @@ -89,4 +89,5 @@ sciknoweval_dataset_physics = dict( eval_cfg=eval_cfg, ) + sciknoweval_datasets = [sciknoweval_dataset_biology, sciknoweval_dataset_chemistry, sciknoweval_dataset_physics, sciknoweval_dataset_material] diff --git a/opencompass/configs/datasets/SciKnowEval/SciKnowEval_llmjudge_gen_ebe47d.py b/opencompass/configs/datasets/SciKnowEval/SciKnowEval_llmjudge_gen_ebe47d.py index a681efe9..a3a87fa7 100644 --- a/opencompass/configs/datasets/SciKnowEval/SciKnowEval_llmjudge_gen_ebe47d.py +++ b/opencompass/configs/datasets/SciKnowEval/SciKnowEval_llmjudge_gen_ebe47d.py @@ -1,4 +1,4 @@ -from opencompass.datasets import SciKnowEvalDataset, SciKnowEvalEvaluator +from opencompass.datasets import SciKnowEvalDataset from opencompass.datasets import generic_llmjudge_postprocess from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_prompt_template import PromptTemplate @@ -6,7 +6,6 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.evaluator import GenericLLMEvaluator ZERO_SHOT_PROMPT = '{q4}' - GRADER_TEMPLATE = """ Please as a grading expert, judge whether the final answers given by the candidates below are consistent with the standard answers, that is, whether the candidates answered correctly. diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index b8cc5095..dcdfc7e6 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -130,7 +130,8 @@ from .ruler import * # noqa: F401, F403 from .safety import * # noqa: F401, F403 from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403 from .scicode import * # noqa: F401, F403 -from .SciKnowEval import * # noqa: F401, F403 +from .SciKnowEval import SciKnowEvalDataset # noqa: F401, F403 +from .SciKnowEval import SciKnowEvalEvaluator # noqa: F401, F403 from .simpleqa import * # noqa: F401, F403 from .siqa import * # noqa: F401, F403 from .smolinstruct import * # noqa: F401, F403