diff --git a/opencompass/configs/datasets/ChemBench/ChemBench_llmjudge_gen_c584cf.py b/opencompass/configs/datasets/ChemBench/ChemBench_llmjudge_gen_c584cf.py index d6fc7e46..86e9739c 100644 --- a/opencompass/configs/datasets/ChemBench/ChemBench_llmjudge_gen_c584cf.py +++ b/opencompass/configs/datasets/ChemBench/ChemBench_llmjudge_gen_c584cf.py @@ -86,7 +86,7 @@ for _name in chembench_all_sets: ), dataset_cfg=dict( type=ChemBenchDataset, - path='/fs-computility/llm/xiaolinchen/opencompass_fork/data/ChemBench4K', + path='opencompass/ChemBench4K', name=_name, reader_cfg=chembench_reader_cfg, ), diff --git a/opencompass/datasets/TheoremQA/utils.py b/opencompass/datasets/TheoremQA/utils.py index ca9c2661..e6a35e4f 100644 --- a/opencompass/datasets/TheoremQA/utils.py +++ b/opencompass/datasets/TheoremQA/utils.py @@ -33,7 +33,12 @@ def extract_theoremqa_answer(pred: str, answer_flag: bool = True): try: with time_limit(1): tmp = str(latex2sympy(pred)) - pred = str(eval(tmp)) + pred = eval(tmp) + if isinstance(pred, tuple): + pred = str(list(pred)) + else: + pred = str(pred) + except Exception: if re.match(r'-?[\d\.]+\s\D+$', pred): pred = pred.split(' ')[0] diff --git a/opencompass/datasets/judge/rewardbench.py b/opencompass/datasets/judge/rewardbench.py index 9533ae17..e951dc22 100644 --- a/opencompass/datasets/judge/rewardbench.py +++ b/opencompass/datasets/judge/rewardbench.py @@ -14,6 +14,7 @@ from opencompass.utils import get_data_path from ..base import BaseDataset + @LOAD_DATASET.register_module() class RewardBenchDataset(BaseDataset): diff --git a/opencompass/datasets/smolinstruct.py b/opencompass/datasets/smolinstruct.py index 4589d606..54c58a6b 100644 --- a/opencompass/datasets/smolinstruct.py +++ b/opencompass/datasets/smolinstruct.py @@ -4,6 +4,7 @@ from collections import defaultdict import numpy as np from datasets import Dataset, DatasetDict, load_dataset +from nltk.translate.meteor_score import meteor_score from opencompass.openicl.icl_evaluator.icl_base_evaluator import BaseEvaluator from opencompass.registry import (ICL_EVALUATORS, LOAD_DATASET,