mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Refactor Needlebench Configs for CLI Testing Support (#1020)
* add needlebench datasets suffix * fix import * update run.py args for summarizer key and dataset suffix * update utils/run.py
This commit is contained in:
parent
2d4e559763
commit
b50d163265
@ -1,11 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_4k.needlebench import needlebench_datasets as needlebench_datasets_4k
|
|
||||||
from .needlebench_8k.needlebench import needlebench_datasets as needlebench_datasets_8k
|
|
||||||
from .needlebench_32k.needlebench import needlebench_datasets as needlebench_datasets_32k
|
|
||||||
from .needlebench_128k.needlebench import needlebench_datasets as needlebench_datasets_128k
|
|
||||||
from .needlebench_200k.needlebench import needlebench_datasets as needlebench_datasets_200k
|
|
||||||
from .needlebench_1000k.needlebench import needlebench_datasets as needlebench_datasets_1000k
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_1000k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_1000k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_1000k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_1000k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_1000k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -62,7 +62,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -87,10 +87,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -114,10 +114,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -141,10 +141,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -168,7 +168,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -177,7 +177,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -202,10 +202,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -229,10 +229,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -256,10 +256,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -283,4 +283,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
@ -81,10 +81,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -105,4 +105,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -57,7 +57,7 @@ depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -80,10 +80,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -106,4 +106,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_128k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_128k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_128k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_128k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_128k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -64,7 +64,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -89,10 +89,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -116,10 +116,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -143,10 +143,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -170,7 +170,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -179,7 +179,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -204,10 +204,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -231,10 +231,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -258,10 +258,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -285,4 +285,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
@ -81,10 +81,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -105,4 +105,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -82,10 +82,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -108,4 +108,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_200k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_200k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_200k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_200k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_200k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -88,10 +88,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -115,10 +115,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -142,10 +142,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -169,7 +169,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -178,7 +178,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -203,10 +203,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -230,10 +230,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -257,10 +257,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -284,4 +284,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
@ -82,10 +82,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -106,4 +106,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -81,10 +81,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -107,4 +107,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_32k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_32k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_32k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_32k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_32k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -64,7 +64,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -89,10 +89,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -116,10 +116,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -143,10 +143,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -170,7 +170,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -179,7 +179,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -204,10 +204,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -231,10 +231,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -258,10 +258,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in depths_list:
|
for depth_percent in depths_list:
|
||||||
@ -285,4 +285,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
@ -81,10 +81,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -105,4 +105,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -59,7 +59,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -82,10 +82,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -108,4 +108,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_4k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_4k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_4k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_4k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_4k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -90,10 +90,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -119,10 +119,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -148,10 +148,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -177,7 +177,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -186,7 +186,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -213,10 +213,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -242,10 +242,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -271,10 +271,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -300,4 +300,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths_float = generate_depth_percents(
|
depths_float = generate_depth_percents(
|
||||||
document_depth_percent_intervals,
|
document_depth_percent_intervals,
|
||||||
@ -84,10 +84,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -108,4 +108,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -83,10 +83,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -111,4 +111,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -1,18 +0,0 @@
|
|||||||
from mmengine.config import read_base
|
|
||||||
|
|
||||||
with read_base():
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
|
||||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
|
||||||
|
|
||||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
|
||||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
|
||||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning_8k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single_8k import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single_8k import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval_8k import needlebench_en_datasets as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval_8k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -63,7 +63,7 @@ file_list = ['PaulGrahamEssays.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_en = []
|
needlebench_2needle_en_datasets = []
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -90,10 +90,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
needlebench_2needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_en = []
|
needlebench_3needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -119,10 +119,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
needlebench_3needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_en = []
|
needlebench_4needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -148,10 +148,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
needlebench_4needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_en = []
|
needlebench_5needle_en_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -177,7 +177,7 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
needlebench_5needle_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
# ----------Chinese Version----------
|
# ----------Chinese Version----------
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
@ -186,7 +186,7 @@ file_list = ['zh_finance.jsonl']
|
|||||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
diff = 10
|
diff = 10
|
||||||
num_needles = 2
|
num_needles = 2
|
||||||
needlebench_datasets_2needle_zh = []
|
needlebench_2needle_zh_datasets = []
|
||||||
language = 'Chinese'
|
language = 'Chinese'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -213,10 +213,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
needlebench_2needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 3
|
num_needles = 3
|
||||||
needlebench_datasets_3needle_zh = []
|
needlebench_3needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -242,10 +242,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
needlebench_3needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 4
|
num_needles = 4
|
||||||
needlebench_datasets_4needle_zh = []
|
needlebench_4needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -271,10 +271,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
needlebench_4needle_zh_datasets.append(dataset_dict)
|
||||||
|
|
||||||
num_needles = 5
|
num_needles = 5
|
||||||
needlebench_datasets_5needle_zh = []
|
needlebench_5needle_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
for depth_percent in generate_depth_percents(
|
for depth_percent in generate_depth_percents(
|
||||||
@ -300,4 +300,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
needlebench_5needle_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
depths_float = generate_depth_percents(
|
depths_float = generate_depth_percents(
|
||||||
document_depth_percent_intervals,
|
document_depth_percent_intervals,
|
||||||
@ -84,10 +84,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
dataset_dict = {
|
dataset_dict = {
|
||||||
@ -108,4 +108,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
||||||
@ -86,10 +86,10 @@ for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
||||||
@ -117,4 +117,4 @@ for document_depth_percent_intervals in document_depth_percent_intervals_list:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -58,7 +58,7 @@ document_depth_percent_interval_type = "linear"
|
|||||||
|
|
||||||
base_path = './data/needlebench'
|
base_path = './data/needlebench'
|
||||||
file_list = ['PaulGrahamEssays.jsonl']
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
needlebench_datasets_en = []
|
needlebench_en_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -83,10 +83,10 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_en.append(dataset_dict)
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
file_list = ['zh_finance.jsonl']
|
file_list = ['zh_finance.jsonl']
|
||||||
needlebench_datasets_zh = []
|
needlebench_zh_datasets = []
|
||||||
needle_file_name = 'needles.jsonl'
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
for original_context_length in context_lengths:
|
for original_context_length in context_lengths:
|
||||||
@ -111,4 +111,4 @@ for original_context_length in context_lengths:
|
|||||||
'infer_cfg': needlebench_infer_cfg,
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
'eval_cfg': needlebench_eval_cfg
|
'eval_cfg': needlebench_eval_cfg
|
||||||
}
|
}
|
||||||
needlebench_datasets_zh.append(dataset_dict)
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -70,12 +70,20 @@ def get_config_from_arg(args) -> Config:
|
|||||||
datasets = []
|
datasets = []
|
||||||
if args.datasets:
|
if args.datasets:
|
||||||
datasets_dir = os.path.join(args.config_dir, 'datasets')
|
datasets_dir = os.path.join(args.config_dir, 'datasets')
|
||||||
for dataset in match_cfg_file(datasets_dir, args.datasets):
|
for dataset_arg in args.datasets:
|
||||||
get_logger().info(f'Loading {dataset[0]}: {dataset[1]}')
|
if '/' in dataset_arg:
|
||||||
cfg = Config.fromfile(dataset[1])
|
dataset_name, dataset_suffix = dataset_arg.split('/', 1)
|
||||||
for k in cfg.keys():
|
dataset_key_suffix = dataset_suffix
|
||||||
if k.endswith('_datasets'):
|
else:
|
||||||
datasets += cfg[k]
|
dataset_name = dataset_arg
|
||||||
|
dataset_key_suffix = '_datasets'
|
||||||
|
|
||||||
|
for dataset in match_cfg_file(datasets_dir, [dataset_name]):
|
||||||
|
get_logger().info(f'Loading {dataset[0]}: {dataset[1]}')
|
||||||
|
cfg = Config.fromfile(dataset[1])
|
||||||
|
for k in cfg.keys():
|
||||||
|
if k.endswith(dataset_key_suffix):
|
||||||
|
datasets += cfg[k]
|
||||||
else:
|
else:
|
||||||
dataset = {'path': args.custom_dataset_path}
|
dataset = {'path': args.custom_dataset_path}
|
||||||
if args.custom_dataset_infer_method is not None:
|
if args.custom_dataset_infer_method is not None:
|
||||||
@ -119,12 +127,26 @@ def get_config_from_arg(args) -> Config:
|
|||||||
run_cfg=dict(num_gpus=args.num_gpus))
|
run_cfg=dict(num_gpus=args.num_gpus))
|
||||||
models.append(model)
|
models.append(model)
|
||||||
# parse summarizer args
|
# parse summarizer args
|
||||||
summarizer = args.summarizer if args.summarizer is not None else 'example'
|
summarizer_arg = args.summarizer if args.summarizer is not None \
|
||||||
|
else 'example'
|
||||||
summarizers_dir = os.path.join(args.config_dir, 'summarizers')
|
summarizers_dir = os.path.join(args.config_dir, 'summarizers')
|
||||||
s = match_cfg_file(summarizers_dir, [summarizer])[0]
|
|
||||||
|
# Check if summarizer_arg contains '/'
|
||||||
|
if '/' in summarizer_arg:
|
||||||
|
# If it contains '/', split the string by '/'
|
||||||
|
# and use the second part as the configuration key
|
||||||
|
summarizer_file, summarizer_key = summarizer_arg.split('/', 1)
|
||||||
|
else:
|
||||||
|
# If it does not contain '/', keep the original logic unchanged
|
||||||
|
summarizer_key = 'summarizer'
|
||||||
|
summarizer_file = summarizer_arg
|
||||||
|
|
||||||
|
s = match_cfg_file(summarizers_dir, [summarizer_file])[0]
|
||||||
get_logger().info(f'Loading {s[0]}: {s[1]}')
|
get_logger().info(f'Loading {s[0]}: {s[1]}')
|
||||||
cfg = Config.fromfile(s[1])
|
cfg = Config.fromfile(s[1])
|
||||||
summarizer = cfg['summarizer']
|
# Use summarizer_key to retrieve the summarizer definition
|
||||||
|
# from the configuration file
|
||||||
|
summarizer = cfg[summarizer_key]
|
||||||
|
|
||||||
return Config(dict(models=models, datasets=datasets,
|
return Config(dict(models=models, datasets=datasets,
|
||||||
summarizer=summarizer),
|
summarizer=summarizer),
|
||||||
|
Loading…
Reference in New Issue
Block a user