diff --git a/opencompass/datasets/LCBench.py b/opencompass/datasets/LCBench.py index cd747788..9af4cadc 100644 --- a/opencompass/datasets/LCBench.py +++ b/opencompass/datasets/LCBench.py @@ -22,7 +22,10 @@ from .base import BaseDataset class LCDataset(BaseDataset): @staticmethod - def load(path: str, num_repeats: int = 1, difficulty='ALL'): + def load(path: str, + num_repeats: int = 1, + difficulty='ALL', + local_mode=False): """Load LC dataset for pass k mode. Note that you can use num_repeats > 1 when your model does not support @@ -38,7 +41,7 @@ class LCDataset(BaseDataset): num_repeats(int): Number of repetition for this dataset to get multiple responses in special cases. """ - path = get_data_path(path, local_mode=True) + path = get_data_path(path, local_mode=local_mode) def processing_test(example): example['test_case'] = example['test_list'] diff --git a/opencompass/datasets/bigcodebench/bigcodebench.py b/opencompass/datasets/bigcodebench/bigcodebench.py index 8d8e06f3..f1109b1d 100644 --- a/opencompass/datasets/bigcodebench/bigcodebench.py +++ b/opencompass/datasets/bigcodebench/bigcodebench.py @@ -73,6 +73,8 @@ class BigCodeBenchEvaluator(BaseEvaluator): eval_type='instruct', remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/', # noqa dataset_version: str = 'full', + local_mode: bool = False, + path: str = 'opencompass/bigcodebench', pass_k: str = '1,5,10', parallel: int = -1, min_time_limit: float = 1, @@ -84,7 +86,9 @@ class BigCodeBenchEvaluator(BaseEvaluator): super().__init__() self.dataset = BigCodeBenchDataset.load( release_version=release_version, - dataset_version=dataset_version)['test'] + dataset_version=dataset_version, + local_mode=local_mode, + path=path)['test'] self.eval_type = eval_type self.remote_execute_api = remote_execute_api