From b50d1632657db830545c17d4a3fbe01dee2f7e57 Mon Sep 17 00:00:00 2001 From: Mo Li <82895469+DseidLi@users.noreply.github.com> Date: Sun, 7 Apr 2024 15:12:56 +0800 Subject: [PATCH] [Fix] Refactor Needlebench Configs for CLI Testing Support (#1020) * add needlebench datasets suffix * fix import * update run.py args for summarizer key and dataset suffix * update utils/run.py --- configs/datasets/needlebench/needlebench.py | 11 ----- .../needlebench_1000k/needlebench.py | 18 --------- .../needlebench_1000k/needlebench_1000k.py | 18 +++++++++ ...y => needlebench_multi_reasoning_1000k.py} | 32 +++++++-------- ...y => needlebench_multi_retrieval_1000k.py} | 8 ++-- ..._single.py => needlebench_single_1000k.py} | 8 ++-- .../needlebench_128k/needlebench.py | 18 --------- .../needlebench_128k/needlebench_128k.py | 18 +++++++++ ...py => needlebench_multi_reasoning_128k.py} | 32 +++++++-------- ...py => needlebench_multi_retrieval_128k.py} | 8 ++-- ...h_single.py => needlebench_single_128k.py} | 8 ++-- .../needlebench_200k/needlebench.py | 18 --------- .../needlebench_200k/needlebench_200k.py | 18 +++++++++ ...py => needlebench_multi_reasoning_200k.py} | 32 +++++++-------- ...py => needlebench_multi_retrieval_200k.py} | 8 ++-- ...h_single.py => needlebench_single_200k.py} | 8 ++-- .../needlebench_32k/needlebench.py | 18 --------- .../needlebench_32k/needlebench_32k.py | 18 +++++++++ ....py => needlebench_multi_reasoning_32k.py} | 32 +++++++-------- ....py => needlebench_multi_retrieval_32k.py} | 8 ++-- ...ch_single.py => needlebench_single_32k.py} | 8 ++-- .../needlebench/needlebench_4k/needlebench.py | 18 --------- .../needlebench_4k/needlebench_4k.py | 18 +++++++++ ...g.py => needlebench_multi_reasoning_4k.py} | 32 +++++++-------- ...l.py => needlebench_multi_retrieval_4k.py} | 8 ++-- ...nch_single.py => needlebench_single_4k.py} | 8 ++-- .../needlebench/needlebench_8k/needlebench.py | 18 --------- .../needlebench_8k/needlebench_8k.py | 18 +++++++++ ...g.py => needlebench_multi_reasoning_8k.py} | 32 +++++++-------- ...l.py => needlebench_multi_retrieval_8k.py} | 8 ++-- ...bench_multi_retrieval_compare_batch_8k.py} | 8 ++-- ...nch_single.py => needlebench_single_8k.py} | 8 ++-- opencompass/utils/run.py | 40 ++++++++++++++----- 33 files changed, 287 insertions(+), 276 deletions(-) delete mode 100644 configs/datasets/needlebench/needlebench.py delete mode 100644 configs/datasets/needlebench/needlebench_1000k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_1000k/needlebench_1000k.py rename configs/datasets/needlebench/needlebench_1000k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_1000k.py} (92%) rename configs/datasets/needlebench/needlebench_1000k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_1000k.py} (95%) rename configs/datasets/needlebench/needlebench_1000k/{needlebench_single.py => needlebench_single_1000k.py} (95%) delete mode 100644 configs/datasets/needlebench/needlebench_128k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_128k/needlebench_128k.py rename configs/datasets/needlebench/needlebench_128k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_128k.py} (92%) rename configs/datasets/needlebench/needlebench_128k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_128k.py} (95%) rename configs/datasets/needlebench/needlebench_128k/{needlebench_single.py => needlebench_single_128k.py} (95%) delete mode 100644 configs/datasets/needlebench/needlebench_200k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_200k/needlebench_200k.py rename configs/datasets/needlebench/needlebench_200k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_200k.py} (92%) rename configs/datasets/needlebench/needlebench_200k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_200k.py} (95%) rename configs/datasets/needlebench/needlebench_200k/{needlebench_single.py => needlebench_single_200k.py} (96%) delete mode 100644 configs/datasets/needlebench/needlebench_32k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_32k/needlebench_32k.py rename configs/datasets/needlebench/needlebench_32k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_32k.py} (92%) rename configs/datasets/needlebench/needlebench_32k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_32k.py} (95%) rename configs/datasets/needlebench/needlebench_32k/{needlebench_single.py => needlebench_single_32k.py} (95%) delete mode 100644 configs/datasets/needlebench/needlebench_4k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_4k/needlebench_4k.py rename configs/datasets/needlebench/needlebench_4k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_4k.py} (93%) rename configs/datasets/needlebench/needlebench_4k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_4k.py} (95%) rename configs/datasets/needlebench/needlebench_4k/{needlebench_single.py => needlebench_single_4k.py} (96%) delete mode 100644 configs/datasets/needlebench/needlebench_8k/needlebench.py create mode 100644 configs/datasets/needlebench/needlebench_8k/needlebench_8k.py rename configs/datasets/needlebench/needlebench_8k/{needlebench_multi_reasoning.py => needlebench_multi_reasoning_8k.py} (93%) rename configs/datasets/needlebench/needlebench_8k/{needlebench_multi_retrieval.py => needlebench_multi_retrieval_8k.py} (95%) rename configs/datasets/needlebench/needlebench_8k/{needlebench_multi_retrieval_compare_batch.py => needlebench_multi_retrieval_compare_batch_8k.py} (96%) rename configs/datasets/needlebench/needlebench_8k/{needlebench_single.py => needlebench_single_8k.py} (96%) diff --git a/configs/datasets/needlebench/needlebench.py b/configs/datasets/needlebench/needlebench.py deleted file mode 100644 index 09b978dd..00000000 --- a/configs/datasets/needlebench/needlebench.py +++ /dev/null @@ -1,11 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_4k.needlebench import needlebench_datasets as needlebench_datasets_4k - from .needlebench_8k.needlebench import needlebench_datasets as needlebench_datasets_8k - from .needlebench_32k.needlebench import needlebench_datasets as needlebench_datasets_32k - from .needlebench_128k.needlebench import needlebench_datasets as needlebench_datasets_128k - from .needlebench_200k.needlebench import needlebench_datasets as needlebench_datasets_200k - from .needlebench_1000k.needlebench import needlebench_datasets as needlebench_datasets_1000k - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_1000k/needlebench.py b/configs/datasets/needlebench/needlebench_1000k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_1000k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_1000k/needlebench_1000k.py b/configs/datasets/needlebench/needlebench_1000k/needlebench_1000k.py new file mode 100644 index 00000000..2e01f539 --- /dev/null +++ b/configs/datasets/needlebench/needlebench_1000k/needlebench_1000k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_1000k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_1000k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_1000k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_1000k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_1000k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_1000k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_1000k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_1000k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_1000k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_1000k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_1000k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_1000k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning_1000k.py similarity index 92% rename from configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning_1000k.py index 80402bdd..805b359b 100644 --- a/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning_1000k.py @@ -62,7 +62,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -87,10 +87,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -114,10 +114,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -141,10 +141,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -168,7 +168,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -177,7 +177,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -202,10 +202,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -229,10 +229,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -256,10 +256,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -283,4 +283,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval_1000k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval_1000k.py index 993e4f7c..7e1421f5 100644 --- a/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval_1000k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] @@ -81,10 +81,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -105,4 +105,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_1000k/needlebench_single.py b/configs/datasets/needlebench/needlebench_1000k/needlebench_single_1000k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_1000k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_1000k/needlebench_single_1000k.py index 5a41275e..2f998486 100644 --- a/configs/datasets/needlebench/needlebench_1000k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_1000k/needlebench_single_1000k.py @@ -57,7 +57,7 @@ depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -80,10 +80,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -106,4 +106,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench.py b/configs/datasets/needlebench/needlebench_128k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_128k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_128k.py b/configs/datasets/needlebench/needlebench_128k/needlebench_128k.py new file mode 100644 index 00000000..d0dd9af3 --- /dev/null +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_128k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_128k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_128k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_128k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_128k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_128k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_128k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_128k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_128k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_128k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_128k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_128k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_128k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning_128k.py similarity index 92% rename from configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning_128k.py index 00f399c0..e2ce33d3 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning_128k.py @@ -64,7 +64,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -89,10 +89,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -116,10 +116,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -143,10 +143,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -170,7 +170,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -179,7 +179,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -204,10 +204,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -231,10 +231,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -258,10 +258,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -285,4 +285,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval_128k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval_128k.py index 179e7c9a..aa77293c 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval_128k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] @@ -81,10 +81,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -105,4 +105,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_single.py b/configs/datasets/needlebench/needlebench_128k/needlebench_single_128k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_128k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_128k/needlebench_single_128k.py index 451a0084..5f7520e2 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_single_128k.py @@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -82,10 +82,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -108,4 +108,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench.py b/configs/datasets/needlebench/needlebench_200k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_200k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_200k.py b/configs/datasets/needlebench/needlebench_200k/needlebench_200k.py new file mode 100644 index 00000000..3d698585 --- /dev/null +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_200k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_200k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_200k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_200k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_200k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_200k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_200k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_200k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_200k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_200k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_200k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_200k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_200k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning_200k.py similarity index 92% rename from configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning_200k.py index f06845dc..16d230a3 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning_200k.py @@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -88,10 +88,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -115,10 +115,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -142,10 +142,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -169,7 +169,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -178,7 +178,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -203,10 +203,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -230,10 +230,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -257,10 +257,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -284,4 +284,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval_200k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval_200k.py index 185976fc..d9c601f7 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval_200k.py @@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] @@ -82,10 +82,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -106,4 +106,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_single.py b/configs/datasets/needlebench/needlebench_200k/needlebench_single_200k.py similarity index 96% rename from configs/datasets/needlebench/needlebench_200k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_200k/needlebench_single_200k.py index 29c791cf..53045e1a 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_single_200k.py @@ -58,7 +58,7 @@ depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -81,10 +81,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -107,4 +107,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench.py b/configs/datasets/needlebench/needlebench_32k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_32k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_32k.py b/configs/datasets/needlebench/needlebench_32k/needlebench_32k.py new file mode 100644 index 00000000..e770f96c --- /dev/null +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_32k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_32k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_32k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_32k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_32k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_32k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_32k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_32k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_32k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_32k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_32k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_32k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_32k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning_32k.py similarity index 92% rename from configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning_32k.py index fc7f4c4e..580da693 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning_32k.py @@ -64,7 +64,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -89,10 +89,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -116,10 +116,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -143,10 +143,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -170,7 +170,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -179,7 +179,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -204,10 +204,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -231,10 +231,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -258,10 +258,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in depths_list: @@ -285,4 +285,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval_32k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval_32k.py index 9f5c416a..6f69bbca 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval_32k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] @@ -81,10 +81,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -105,4 +105,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_single.py b/configs/datasets/needlebench/needlebench_32k/needlebench_single_32k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_32k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_32k/needlebench_single_32k.py index 5f837595..d8cfbaa5 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_single_32k.py @@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -82,10 +82,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -108,4 +108,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_4k/needlebench.py b/configs/datasets/needlebench/needlebench_4k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_4k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_4k/needlebench_4k.py b/configs/datasets/needlebench/needlebench_4k/needlebench_4k.py new file mode 100644 index 00000000..f23aa86b --- /dev/null +++ b/configs/datasets/needlebench/needlebench_4k/needlebench_4k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_4k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_4k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_4k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_4k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_4k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_4k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_4k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_4k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_4k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_4k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_4k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_4k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning_4k.py similarity index 93% rename from configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning_4k.py index 01771eb3..8c6f2456 100644 --- a/configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_4k/needlebench_multi_reasoning_4k.py @@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -90,10 +90,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -119,10 +119,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -148,10 +148,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -177,7 +177,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -186,7 +186,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -213,10 +213,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -242,10 +242,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -271,10 +271,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -300,4 +300,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval_4k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval_4k.py index e18736d3..8cc9d6f2 100644 --- a/configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_4k/needlebench_multi_retrieval_4k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths_float = generate_depth_percents( document_depth_percent_intervals, @@ -84,10 +84,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -108,4 +108,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_4k/needlebench_single.py b/configs/datasets/needlebench/needlebench_4k/needlebench_single_4k.py similarity index 96% rename from configs/datasets/needlebench/needlebench_4k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_4k/needlebench_single_4k.py index 895b6556..9277dfef 100644 --- a/configs/datasets/needlebench/needlebench_4k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_4k/needlebench_single_4k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -83,10 +83,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -111,4 +111,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench.py b/configs/datasets/needlebench/needlebench_8k/needlebench.py deleted file mode 100644 index b73abb1f..00000000 --- a/configs/datasets/needlebench/needlebench_8k/needlebench.py +++ /dev/null @@ -1,18 +0,0 @@ -from mmengine.config import read_base - -with read_base(): - from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets - from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets - from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets - - from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets - from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets - from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets - from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets - -needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench_8k.py b/configs/datasets/needlebench/needlebench_8k/needlebench_8k.py new file mode 100644 index 00000000..50eb0899 --- /dev/null +++ b/configs/datasets/needlebench/needlebench_8k/needlebench_8k.py @@ -0,0 +1,18 @@ +from mmengine.config import read_base + +with read_base(): + from .needlebench_multi_reasoning_8k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets + from .needlebench_multi_reasoning_8k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets + from .needlebench_multi_reasoning_8k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets + from .needlebench_multi_reasoning_8k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets + from .needlebench_multi_reasoning_8k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets + from .needlebench_multi_reasoning_8k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets + from .needlebench_multi_reasoning_8k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets + from .needlebench_multi_reasoning_8k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets + + from .needlebench_single_8k import needlebench_en_datasets as needlebench_origin_en_datasets + from .needlebench_single_8k import needlebench_zh_datasets as needlebench_origin_zh_datasets + from .needlebench_multi_retrieval_8k import needlebench_en_datasets as needlebench_parallel_en_datasets + from .needlebench_multi_retrieval_8k import needlebench_zh_datasets as needlebench_parallel_zh_datasets + +needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning_8k.py similarity index 93% rename from configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning.py rename to configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning_8k.py index 98aa0556..4bf2c106 100644 --- a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_reasoning_8k.py @@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl'] needle_file_name = 'multi_needle_reasoning_en.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_en = [] +needlebench_2needle_en_datasets = [] language = 'English' for original_context_length in context_lengths: @@ -90,10 +90,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_en.append(dataset_dict) + needlebench_2needle_en_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_en = [] +needlebench_3needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -119,10 +119,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_en.append(dataset_dict) + needlebench_3needle_en_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_en = [] +needlebench_4needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -148,10 +148,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_en.append(dataset_dict) + needlebench_4needle_en_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_en = [] +needlebench_5needle_en_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -177,7 +177,7 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_en.append(dataset_dict) + needlebench_5needle_en_datasets.append(dataset_dict) # ----------Chinese Version---------- base_path = './data/needlebench' @@ -186,7 +186,7 @@ file_list = ['zh_finance.jsonl'] needle_file_name = 'multi_needle_reasoning_zh.json' diff = 10 num_needles = 2 -needlebench_datasets_2needle_zh = [] +needlebench_2needle_zh_datasets = [] language = 'Chinese' for original_context_length in context_lengths: @@ -213,10 +213,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_2needle_zh.append(dataset_dict) + needlebench_2needle_zh_datasets.append(dataset_dict) num_needles = 3 -needlebench_datasets_3needle_zh = [] +needlebench_3needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -242,10 +242,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_3needle_zh.append(dataset_dict) + needlebench_3needle_zh_datasets.append(dataset_dict) num_needles = 4 -needlebench_datasets_4needle_zh = [] +needlebench_4needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -271,10 +271,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_4needle_zh.append(dataset_dict) + needlebench_4needle_zh_datasets.append(dataset_dict) num_needles = 5 -needlebench_datasets_5needle_zh = [] +needlebench_5needle_zh_datasets = [] for original_context_length in context_lengths: for depth_percent in generate_depth_percents( @@ -300,4 +300,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_5needle_zh.append(dataset_dict) + needlebench_5needle_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_8k.py similarity index 95% rename from configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval.py rename to configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_8k.py index 2890c6cc..05c862f0 100644 --- a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_8k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' depths_float = generate_depth_percents( document_depth_percent_intervals, @@ -84,10 +84,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] for original_context_length in context_lengths: dataset_dict = { @@ -108,4 +108,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch.py b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch_8k.py similarity index 96% rename from configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch.py rename to configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch_8k.py index e088a969..abc2f9b8 100644 --- a/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch.py +++ b/configs/datasets/needlebench/needlebench_8k/needlebench_multi_retrieval_compare_batch_8k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for document_depth_percent_intervals in document_depth_percent_intervals_list: @@ -86,10 +86,10 @@ for document_depth_percent_intervals in document_depth_percent_intervals_list: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for document_depth_percent_intervals in document_depth_percent_intervals_list: @@ -117,4 +117,4 @@ for document_depth_percent_intervals in document_depth_percent_intervals_list: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/configs/datasets/needlebench/needlebench_8k/needlebench_single.py b/configs/datasets/needlebench/needlebench_8k/needlebench_single_8k.py similarity index 96% rename from configs/datasets/needlebench/needlebench_8k/needlebench_single.py rename to configs/datasets/needlebench/needlebench_8k/needlebench_single_8k.py index 3ee55bb8..91ed9b79 100644 --- a/configs/datasets/needlebench/needlebench_8k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_8k/needlebench_single_8k.py @@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear" base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] -needlebench_datasets_en = [] +needlebench_en_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -83,10 +83,10 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_en.append(dataset_dict) + needlebench_en_datasets.append(dataset_dict) file_list = ['zh_finance.jsonl'] -needlebench_datasets_zh = [] +needlebench_zh_datasets = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: @@ -111,4 +111,4 @@ for original_context_length in context_lengths: 'infer_cfg': needlebench_infer_cfg, 'eval_cfg': needlebench_eval_cfg } - needlebench_datasets_zh.append(dataset_dict) + needlebench_zh_datasets.append(dataset_dict) diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py index e1babda8..0e068b2a 100644 --- a/opencompass/utils/run.py +++ b/opencompass/utils/run.py @@ -70,12 +70,20 @@ def get_config_from_arg(args) -> Config: datasets = [] if args.datasets: datasets_dir = os.path.join(args.config_dir, 'datasets') - for dataset in match_cfg_file(datasets_dir, args.datasets): - get_logger().info(f'Loading {dataset[0]}: {dataset[1]}') - cfg = Config.fromfile(dataset[1]) - for k in cfg.keys(): - if k.endswith('_datasets'): - datasets += cfg[k] + for dataset_arg in args.datasets: + if '/' in dataset_arg: + dataset_name, dataset_suffix = dataset_arg.split('/', 1) + dataset_key_suffix = dataset_suffix + else: + dataset_name = dataset_arg + dataset_key_suffix = '_datasets' + + for dataset in match_cfg_file(datasets_dir, [dataset_name]): + get_logger().info(f'Loading {dataset[0]}: {dataset[1]}') + cfg = Config.fromfile(dataset[1]) + for k in cfg.keys(): + if k.endswith(dataset_key_suffix): + datasets += cfg[k] else: dataset = {'path': args.custom_dataset_path} if args.custom_dataset_infer_method is not None: @@ -119,12 +127,26 @@ def get_config_from_arg(args) -> Config: run_cfg=dict(num_gpus=args.num_gpus)) models.append(model) # parse summarizer args - summarizer = args.summarizer if args.summarizer is not None else 'example' + summarizer_arg = args.summarizer if args.summarizer is not None \ + else 'example' summarizers_dir = os.path.join(args.config_dir, 'summarizers') - s = match_cfg_file(summarizers_dir, [summarizer])[0] + + # Check if summarizer_arg contains '/' + if '/' in summarizer_arg: + # If it contains '/', split the string by '/' + # and use the second part as the configuration key + summarizer_file, summarizer_key = summarizer_arg.split('/', 1) + else: + # If it does not contain '/', keep the original logic unchanged + summarizer_key = 'summarizer' + summarizer_file = summarizer_arg + + s = match_cfg_file(summarizers_dir, [summarizer_file])[0] get_logger().info(f'Loading {s[0]}: {s[1]}') cfg = Config.fromfile(s[1]) - summarizer = cfg['summarizer'] + # Use summarizer_key to retrieve the summarizer definition + # from the configuration file + summarizer = cfg[summarizer_key] return Config(dict(models=models, datasets=datasets, summarizer=summarizer),