diff --git a/opencompass/datasets/TheoremQA/main.py b/opencompass/datasets/TheoremQA/main.py index 7e06792e..4c145896 100644 --- a/opencompass/datasets/TheoremQA/main.py +++ b/opencompass/datasets/TheoremQA/main.py @@ -65,7 +65,7 @@ class TheoremQAEvaluatorV3(BaseEvaluator): { # "question": question, # "solution": output, - "correct": groundtruth, + # "correct": groundtruth, "pred": answer, "is_correct": is_correct, } diff --git a/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py b/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py index 61fdbd23..b0b73188 100644 --- a/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py +++ b/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py @@ -2,8 +2,12 @@ import json +from opencompass.openicl.icl_evaluator import BaseEvaluator +from opencompass.registry import ICL_EVALUATORS -class TEvalEvaluator: + +@ICL_EVALUATORS.register_module() +class TEvalEvaluator(BaseEvaluator): """This module contains the following evaluators for evaluating the capabilities of the various dimensions of the LLM. diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 348df85d..6bd5e9a9 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -37,7 +37,7 @@ rouge_score sacrebleu scikit_learn==1.5.0 seaborn -sentence_transformers==2.2.2 +sentence_transformers tabulate tiktoken timeout_decorator