diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py index e15fd552..00f399c0 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py @@ -53,6 +53,7 @@ needlebench_eval_cfg = dict( pred_role='BOT') context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000]) +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] document_depth_percent_intervals = 20 document_depth_percent_interval_type = "linear" @@ -67,9 +68,7 @@ needlebench_datasets_2needle_en = [] language = 'English' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_128k', @@ -96,9 +95,7 @@ num_needles = 3 needlebench_datasets_3needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_128k', @@ -125,9 +122,7 @@ num_needles = 4 needlebench_datasets_4needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_128k', @@ -154,9 +149,7 @@ num_needles = 5 needlebench_datasets_5needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_128k', @@ -190,9 +183,7 @@ needlebench_datasets_2needle_zh = [] language = 'Chinese' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_128k', @@ -219,9 +210,7 @@ num_needles = 3 needlebench_datasets_3needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_128k', @@ -248,9 +237,7 @@ num_needles = 4 needlebench_datasets_4needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_128k', @@ -277,9 +264,7 @@ num_needles = 5 needlebench_datasets_5needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_128k', diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py index 5d561f4d..179e7c9a 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py @@ -60,10 +60,7 @@ base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' -depths_float = generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type) -depths = [int(depth) for depth in depths_float] +depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] for original_context_length in context_lengths: dataset_dict = { diff --git a/configs/datasets/needlebench/needlebench_128k/needlebench_single.py b/configs/datasets/needlebench/needlebench_128k/needlebench_single.py index 62a5f38b..451a0084 100644 --- a/configs/datasets/needlebench/needlebench_128k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_128k/needlebench_single.py @@ -53,6 +53,7 @@ needlebench_eval_cfg = dict( pred_role='BOT') context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000]) +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] document_depth_percent_intervals = 20 document_depth_percent_interval_type = "linear" @@ -62,9 +63,7 @@ needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_en_128k', @@ -90,9 +89,7 @@ needlebench_datasets_zh = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_zh_128k', diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py index 1478f5b4..f06845dc 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_reasoning.py @@ -52,9 +52,9 @@ needlebench_eval_cfg = dict( dataset_postprocessor=dict(type=needlebench_dataset_postprocess), pred_role='BOT') -context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) -document_depth_percent_intervals = 20 -document_depth_percent_interval_type = "linear" +# context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) +context_lengths = [16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000] +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] # ----------English Version---------- base_path = './data/needlebench' @@ -67,9 +67,7 @@ needlebench_datasets_2needle_en = [] language = 'English' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_200k', @@ -96,9 +94,7 @@ num_needles = 3 needlebench_datasets_3needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_200k', @@ -125,9 +121,7 @@ num_needles = 4 needlebench_datasets_4needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_200k', @@ -154,9 +148,7 @@ num_needles = 5 needlebench_datasets_5needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_200k', @@ -190,9 +182,7 @@ needlebench_datasets_2needle_zh = [] language = 'Chinese' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_200k', @@ -219,9 +209,7 @@ num_needles = 3 needlebench_datasets_3needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_200k', @@ -248,9 +236,7 @@ num_needles = 4 needlebench_datasets_4needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_200k', @@ -277,9 +263,7 @@ num_needles = 5 needlebench_datasets_5needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_200k', diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py index d01145e8..185976fc 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_multi_retrieval.py @@ -52,7 +52,8 @@ needlebench_eval_cfg = dict( dataset_postprocessor=dict(type=needlebench_dataset_postprocess), pred_role='BOT') -context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) +# context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) +context_lengths = list([16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000]) document_depth_percent_intervals = 20 document_depth_percent_interval_type = "linear" @@ -60,10 +61,7 @@ base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' -depths_float = generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type) -depths = [int(depth) for depth in depths_float] +depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] for original_context_length in context_lengths: dataset_dict = { diff --git a/configs/datasets/needlebench/needlebench_200k/needlebench_single.py b/configs/datasets/needlebench/needlebench_200k/needlebench_single.py index 9e7fa129..29c791cf 100644 --- a/configs/datasets/needlebench/needlebench_200k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_200k/needlebench_single.py @@ -52,9 +52,9 @@ needlebench_eval_cfg = dict( dataset_postprocessor=dict(type=needlebench_dataset_postprocess), pred_role='BOT') -context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) -document_depth_percent_intervals = 20 -document_depth_percent_interval_type = "linear" +# context_lengths = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) +context_lengths = [16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000] +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] @@ -62,9 +62,7 @@ needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_en_200k', @@ -90,9 +88,7 @@ needlebench_datasets_zh = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_zh_200k', diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py index efb43206..fc7f4c4e 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_reasoning.py @@ -53,6 +53,7 @@ needlebench_eval_cfg = dict( pred_role='BOT') context_lengths = list([9000, 13000, 17000, 21000, 25000, 29000, 31000, 32000]) +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] document_depth_percent_intervals = 20 document_depth_percent_interval_type = "linear" @@ -67,9 +68,7 @@ needlebench_datasets_2needle_en = [] language = 'English' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_32k', @@ -96,9 +95,7 @@ num_needles = 3 needlebench_datasets_3needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_32k', @@ -125,9 +122,7 @@ num_needles = 4 needlebench_datasets_4needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_32k', @@ -154,9 +149,7 @@ num_needles = 5 needlebench_datasets_5needle_en = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_en_32k', @@ -190,9 +183,7 @@ needlebench_datasets_2needle_zh = [] language = 'Chinese' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_32k', @@ -219,9 +210,7 @@ num_needles = 3 needlebench_datasets_3needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_32k', @@ -248,9 +237,7 @@ num_needles = 4 needlebench_datasets_4needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_32k', @@ -277,9 +264,7 @@ num_needles = 5 needlebench_datasets_5needle_zh = [] for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_{num_needles}needle_zh_32k', diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py index b04cb952..9f5c416a 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_multi_retrieval.py @@ -60,10 +60,7 @@ base_path = './data/needlebench' file_list = ['PaulGrahamEssays.jsonl'] needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' -depths_float = generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type) -depths = [int(depth) for depth in depths_float] +depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] for original_context_length in context_lengths: dataset_dict = { diff --git a/configs/datasets/needlebench/needlebench_32k/needlebench_single.py b/configs/datasets/needlebench/needlebench_32k/needlebench_single.py index 6b9eb3d4..5f837595 100644 --- a/configs/datasets/needlebench/needlebench_32k/needlebench_single.py +++ b/configs/datasets/needlebench/needlebench_32k/needlebench_single.py @@ -53,6 +53,7 @@ needlebench_eval_cfg = dict( pred_role='BOT') context_lengths = list([9000, 13000, 17000, 21000, 25000, 29000, 31000, 32000]) +depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] document_depth_percent_intervals = 20 document_depth_percent_interval_type = "linear" @@ -62,9 +63,7 @@ needlebench_datasets_en = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_en_32k', @@ -90,9 +89,7 @@ needlebench_datasets_zh = [] needle_file_name = 'needles.jsonl' for original_context_length in context_lengths: - for depth_percent in generate_depth_percents( - document_depth_percent_intervals, - document_depth_percent_interval_type): + for depth_percent in depths_list: dataset_dict = { 'abbr': f'Length{original_context_length}' f'Depth{int(depth_percent)}_origin_zh_32k', diff --git a/configs/summarizers/needlebench.py b/configs/summarizers/needlebench.py index 57f7b557..f31d54c2 100644 --- a/configs/summarizers/needlebench.py +++ b/configs/summarizers/needlebench.py @@ -4,6 +4,7 @@ from opencompass.summarizers.needlebench import NeedleBenchATCSummarizer # ----------NeedleBench-4k-summarizer---------- context_lengths_4k = list(range(1000, 5000, 1000)) depths = [0, 5, 10, 15, 21, 26, 31, 36, 42, 47, 52, 57, 63, 68, 73, 78, 84, 89, 94, 100] +depths_list_sparse = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100] # Initialize the lists _needlebench_4k_2needle_en = [] @@ -235,7 +236,7 @@ _needlebench_32k_origin_zh = [] # Fill the lists using nested loops for original_context_length in context_lengths_32k: - for depth_percent in depths: + for depth_percent in depths_list_sparse: _needlebench_32k_2needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_en_32k') _needlebench_32k_3needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_en_32k') _needlebench_32k_4needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_en_32k') @@ -343,7 +344,7 @@ _needlebench_128k_origin_zh = [] # Fill the lists using nested loops for original_context_length in context_lengths_128k: - for depth_percent in depths: + for depth_percent in depths_list_sparse: _needlebench_128k_2needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_en_128k') _needlebench_128k_3needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_en_128k') _needlebench_128k_4needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_en_128k') @@ -435,8 +436,7 @@ needlebench_128k_summarizer = dict( # ----------NeedleBench-200k-summarizer---------- -context_lengths_200k = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]) - +context_lengths_200k = list([16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000]) # Initialize the lists _needlebench_200k_2needle_en = [] _needlebench_200k_3needle_en = [] @@ -451,7 +451,7 @@ _needlebench_200k_origin_zh = [] # Fill the lists using nested loops for original_context_length in context_lengths_200k: - for depth_percent in depths: + for depth_percent in depths_list_sparse: _needlebench_200k_2needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_en_200k') _needlebench_200k_3needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_en_200k') _needlebench_200k_4needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_en_200k')