diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_gen_9befbf.py b/opencompass/configs/datasets/livemathbench/livemathbench_gen_9befbf.py index 3748c022..454e7d3e 100644 --- a/opencompass/configs/datasets/livemathbench/livemathbench_gen_9befbf.py +++ b/opencompass/configs/datasets/livemathbench/livemathbench_gen_9befbf.py @@ -48,4 +48,4 @@ livemathbench_dataset = dict( ) ) ) -livemathbench_datasets = [livemathbench_dataset] \ No newline at end of file +livemathbench_datasets = [livemathbench_dataset] diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_gen_f1c095.py b/opencompass/configs/datasets/livemathbench/livemathbench_gen_f1c095.py deleted file mode 100644 index 56161095..00000000 --- a/opencompass/configs/datasets/livemathbench/livemathbench_gen_f1c095.py +++ /dev/null @@ -1,49 +0,0 @@ -from opencompass.openicl.icl_prompt_template import PromptTemplate -from opencompass.openicl.icl_retriever import ZeroRetriever -from opencompass.openicl.icl_inferencer import GenInferencer - -from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator - - -livemathbench_reader_cfg = dict( - input_columns=['prompt'], - output_column='answer' -) - -livemathbench_infer_cfg = dict( - prompt_template=dict( - type=PromptTemplate, - template=dict( - round=[ - dict(role='HUMAN', prompt='{prompt}'), - ] - ) - ), - retriever=dict(type=ZeroRetriever), - inferencer=dict( - type=GenInferencer, - max_out_len=8192, - temperature=1.0 - ) -) - -livemathbench_eval_cfg = dict( - evaluator=dict( - type=LiveMathBenchEvaluator, - model_name='Qwen/Qwen2.5-72B-Instruct', - url=['http://172.30.40.154:23333/v1/'] #'https://api.openai.com/v1/' - ) -) - -livemathbench_datasets = [ - dict( - type=LiveMathBenchDataset, - abbr='LiveMathBench-k1-n1', - path='opencompass/LiveMathBench202412', - k=1, # K@Pass - n=1, # Run times - reader_cfg=livemathbench_reader_cfg, - infer_cfg=livemathbench_infer_cfg, - eval_cfg=livemathbench_eval_cfg - ) -] \ No newline at end of file diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen.py b/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen.py index d311eeaf..c1d72d15 100644 --- a/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen.py +++ b/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen.py @@ -1,4 +1,4 @@ from mmengine.config import read_base with read_base(): - from .livemathbench_greedy_gen_efb20d import livemathbench_datasets # noqa: F401, F403 \ No newline at end of file + from .livemathbench_greedy_gen_9befbf import livemathbench_datasets # noqa: F401, F403 \ No newline at end of file diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_hard_gen_353ae7.py b/opencompass/configs/datasets/livemathbench/livemathbench_hard_gen_353ae7.py new file mode 100644 index 00000000..e932d3c3 --- /dev/null +++ b/opencompass/configs/datasets/livemathbench/livemathbench_hard_gen_353ae7.py @@ -0,0 +1,50 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer + +from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator + + +livemathbench_dataset = dict( + type=LiveMathBenchDataset, + path='', + k=16, + replication=3, + dataset_splits=['hard'], + dataset_languages=['cn', 'en'], + cot=True, + version='202412', + abbr='LiveMathBench-v202412-Hard', + reader_cfg=dict( + input_columns=['prompt'], + output_column='answer' + ), + infer_cfg=dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='{prompt}'), + ] + ) + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict( + type=GenInferencer + ), + ), + eval_cfg=dict( + evaluator=dict( + type=LiveMathBenchEvaluator, + model_name='', + url=[], + use_extract_model=False, + extract_url=[], + extract_model_name='', + k=[4, 8, 16], + replication=3, + thresholds=[0.0, 0.25, 0.5, 0.75, 1.0] + ) + ) +) +livemathbench_datasets = [livemathbench_dataset] \ No newline at end of file diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_hard_greedy_gen_353ae7.py b/opencompass/configs/datasets/livemathbench/livemathbench_hard_greedy_gen_353ae7.py new file mode 100644 index 00000000..830e55af --- /dev/null +++ b/opencompass/configs/datasets/livemathbench/livemathbench_hard_greedy_gen_353ae7.py @@ -0,0 +1,50 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer + +from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator + + +livemathbench_dataset = dict( + type=LiveMathBenchDataset, + path='', + k=1, + replication=1, + dataset_splits=['hard'], + dataset_languages=['cn', 'en'], + cot=True, + version='202412', + abbr='LiveMathBench-v202412-Hard', + reader_cfg=dict( + input_columns=['prompt'], + output_column='answer' + ), + infer_cfg=dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='{prompt}'), + ] + ) + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict( + type=GenInferencer + ), + ), + eval_cfg=dict( + evaluator=dict( + type=LiveMathBenchEvaluator, + model_name='', + url=[], + use_extract_model=False, + extract_url=[], + extract_model_name='', + k=[1], + replication=1, + thresholds=[0.0] + ) + ) +) +livemathbench_datasets = [livemathbench_dataset] \ No newline at end of file diff --git a/opencompass/datasets/livemathbench/livemathbench.py b/opencompass/datasets/livemathbench/livemathbench.py index d2b4b93b..13abf3aa 100644 --- a/opencompass/datasets/livemathbench/livemathbench.py +++ b/opencompass/datasets/livemathbench/livemathbench.py @@ -48,6 +48,7 @@ class LiveMathBenchDataset(BaseDataset): if path != '': path = get_data_path(path) path = os.path.join(path, version) + for split, language in product(dataset_splits, dataset_languages): dataset_info[f'{split}_{language}'] = { 'single-choice': 0, @@ -64,7 +65,6 @@ class LiveMathBenchDataset(BaseDataset): if path != '': file_path = os.path.join(path, f'{split}_{language}.jsonl') - if not os.path.exists(file_path): raise FileNotFoundError( f'File {file_path} does not exist, please check the ' diff --git a/opencompass/models/turbomind_with_tf_above_v4_33.py b/opencompass/models/turbomind_with_tf_above_v4_33.py index 88b605f9..7138974d 100644 --- a/opencompass/models/turbomind_with_tf_above_v4_33.py +++ b/opencompass/models/turbomind_with_tf_above_v4_33.py @@ -164,8 +164,6 @@ class TurboMindModelwithChatTemplate(BaseModel): self.logger.info('Generation Config of LMdeploy: ') self.logger.info(gen_config) - - results = [] outputs = self.pipe(messages, gen_config=gen_config, do_preprocess=False) for output in outputs: