mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
add Needlebench-1000K configs
This commit is contained in:
parent
3098d78845
commit
367ba1ba61
@ -6,5 +6,6 @@ with read_base():
|
||||
from .needlebench_32k.needlebench import needlebench_datasets as needlebench_datasets_32k
|
||||
from .needlebench_128k.needlebench import needlebench_datasets as needlebench_datasets_128k
|
||||
from .needlebench_200k.needlebench import needlebench_datasets as needlebench_datasets_200k
|
||||
from .needlebench_1000k.needlebench import needlebench_datasets as needlebench_datasets_1000k
|
||||
|
||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
@ -0,0 +1,18 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
||||
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
||||
|
||||
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
||||
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
||||
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
||||
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
||||
|
||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -0,0 +1,286 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets.needlebench.multi import NeedleBenchMultiDataset
|
||||
from opencompass.datasets.needlebench.multi import NeedleBenchMultiEvaluator
|
||||
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||
import math
|
||||
|
||||
|
||||
def logistic(x, L=100, x0=50, k=0.1):
|
||||
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||
|
||||
|
||||
def generate_linear_space(start, end, num):
|
||||
if num == 1:
|
||||
return [start]
|
||||
elif num < 1:
|
||||
raise ValueError("num must be at least 1.")
|
||||
step = (end - start) / (num - 1)
|
||||
return [start + step * i for i in range(num)]
|
||||
|
||||
|
||||
def generate_depth_percents(intervals, interval_type):
|
||||
if interval_type == 'linear':
|
||||
return generate_linear_space(0, 100, intervals)
|
||||
elif interval_type == 'sigmoid':
|
||||
linear_space = generate_linear_space(0, 100, intervals)
|
||||
return [logistic(x) for x in linear_space]
|
||||
else:
|
||||
raise ValueError('Unsupported interval type')
|
||||
|
||||
|
||||
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||
|
||||
needlebench_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='{prompt}'),
|
||||
dict(role='BOT', prompt='{answer}\n'),
|
||||
]
|
||||
)
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
needlebench_eval_cfg = dict(
|
||||
evaluator=dict(type=NeedleBenchMultiEvaluator),
|
||||
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
context_lengths = [20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000]
|
||||
depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||
|
||||
# ----------English Version----------
|
||||
base_path = './data/needlebench'
|
||||
file_list = ['PaulGrahamEssays.jsonl']
|
||||
|
||||
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||
diff = 10
|
||||
num_needles = 2
|
||||
needlebench_datasets_2needle_en = []
|
||||
language = 'English'
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 600,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_2needle_en.append(dataset_dict)
|
||||
|
||||
num_needles = 3
|
||||
needlebench_datasets_3needle_en = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 600,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_3needle_en.append(dataset_dict)
|
||||
|
||||
num_needles = 4
|
||||
needlebench_datasets_4needle_en = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 600,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_4needle_en.append(dataset_dict)
|
||||
|
||||
num_needles = 5
|
||||
needlebench_datasets_5needle_en = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 600,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_5needle_en.append(dataset_dict)
|
||||
|
||||
# ----------Chinese Version----------
|
||||
base_path = './data/needlebench'
|
||||
file_list = ['zh_finance.jsonl']
|
||||
|
||||
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||
diff = 10
|
||||
num_needles = 2
|
||||
needlebench_datasets_2needle_zh = []
|
||||
language = 'Chinese'
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_2needle_zh.append(dataset_dict)
|
||||
|
||||
num_needles = 3
|
||||
needlebench_datasets_3needle_zh = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_3needle_zh.append(dataset_dict)
|
||||
|
||||
num_needles = 4
|
||||
needlebench_datasets_4needle_zh = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_4needle_zh.append(dataset_dict)
|
||||
|
||||
num_needles = 5
|
||||
needlebench_datasets_5needle_zh = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||
'type': NeedleBenchMultiDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': language,
|
||||
'needle_file_name': needle_file_name,
|
||||
'num_needles': num_needles,
|
||||
'diff': diff,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_5needle_zh.append(dataset_dict)
|
@ -0,0 +1,108 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets.needlebench.parallel import NeedleBenchParallelDataset
|
||||
from opencompass.datasets.needlebench.parallel import NeedleBenchParallelEvaluator
|
||||
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||
import math
|
||||
|
||||
|
||||
def logistic(x, L=100, x0=50, k=0.1):
|
||||
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||
|
||||
|
||||
def generate_linear_space(start, end, num):
|
||||
if num == 1:
|
||||
return [start]
|
||||
elif num < 1:
|
||||
raise ValueError("num must be at least 1.")
|
||||
step = (end - start) / (num - 1)
|
||||
return [start + step * i for i in range(num)]
|
||||
|
||||
|
||||
def generate_depth_percents(intervals, interval_type):
|
||||
if interval_type == 'linear':
|
||||
return generate_linear_space(0, 100, intervals)
|
||||
elif interval_type == 'sigmoid':
|
||||
linear_space = generate_linear_space(0, 100, intervals)
|
||||
return [logistic(x) for x in linear_space]
|
||||
else:
|
||||
raise ValueError('Unsupported interval type')
|
||||
|
||||
|
||||
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||
|
||||
needlebench_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='{prompt}'),
|
||||
dict(role='BOT', prompt='{answer}\n'),
|
||||
]
|
||||
)
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
needlebench_eval_cfg = dict(
|
||||
evaluator=dict(type=NeedleBenchParallelEvaluator),
|
||||
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
context_lengths = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||
document_depth_percent_intervals = 20
|
||||
document_depth_percent_interval_type = "linear"
|
||||
|
||||
base_path = './data/needlebench'
|
||||
file_list = ['PaulGrahamEssays.jsonl']
|
||||
needlebench_datasets_en = []
|
||||
needle_file_name = 'needles.jsonl'
|
||||
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'_parallel_en_1000k',
|
||||
'type': NeedleBenchParallelDataset,
|
||||
'path': base_path,
|
||||
'needle_file_name': needle_file_name,
|
||||
'length': original_context_length,
|
||||
'depths': depths,
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 25,
|
||||
'length_buffer': 3000,
|
||||
'guide': True,
|
||||
'language': 'English',
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_en.append(dataset_dict)
|
||||
|
||||
file_list = ['zh_finance.jsonl']
|
||||
needlebench_datasets_zh = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'_parallel_zh_1000k',
|
||||
'type': NeedleBenchParallelDataset,
|
||||
'path': base_path,
|
||||
'needle_file_name': needle_file_name,
|
||||
'length': original_context_length,
|
||||
'depths': depths,
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 25,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': 'Chinese',
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_zh.append(dataset_dict)
|
@ -0,0 +1,109 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets.needlebench.origin import NeedleBenchOriginDataset
|
||||
from opencompass.datasets.needlebench.origin import NeedleBenchOriginEvaluator
|
||||
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||
import math
|
||||
|
||||
|
||||
def logistic(x, L=100, x0=50, k=0.1):
|
||||
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||
|
||||
|
||||
def generate_linear_space(start, end, num):
|
||||
if num == 1:
|
||||
return [start]
|
||||
elif num < 1:
|
||||
raise ValueError("num must be at least 1.")
|
||||
step = (end - start) / (num - 1)
|
||||
return [start + step * i for i in range(num)]
|
||||
|
||||
|
||||
def generate_depth_percents(intervals, interval_type):
|
||||
if interval_type == 'linear':
|
||||
return generate_linear_space(0, 100, intervals)
|
||||
elif interval_type == 'sigmoid':
|
||||
linear_space = generate_linear_space(0, 100, intervals)
|
||||
return [logistic(x) for x in linear_space]
|
||||
else:
|
||||
raise ValueError('Unsupported interval type')
|
||||
|
||||
|
||||
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||
|
||||
needlebench_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='{prompt}'),
|
||||
dict(role='BOT', prompt='{answer}\n'),
|
||||
]
|
||||
)
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer))
|
||||
|
||||
needlebench_eval_cfg = dict(
|
||||
evaluator=dict(type=NeedleBenchOriginEvaluator),
|
||||
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
context_lengths = [20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000]
|
||||
depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||
|
||||
base_path = './data/needlebench'
|
||||
file_list = ['PaulGrahamEssays.jsonl']
|
||||
needlebench_datasets_en = []
|
||||
needle_file_name = 'needles.jsonl'
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_origin_en_1000k',
|
||||
'type': NeedleBenchOriginDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 600,
|
||||
'guide': True,
|
||||
'language': 'English',
|
||||
'needle_file_name': needle_file_name,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_en.append(dataset_dict)
|
||||
|
||||
file_list = ['zh_finance.jsonl']
|
||||
needlebench_datasets_zh = []
|
||||
needle_file_name = 'needles.jsonl'
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in depths_list:
|
||||
dataset_dict = {
|
||||
'abbr': f'Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}_origin_zh_1000k',
|
||||
'type': NeedleBenchOriginDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': 'Chinese',
|
||||
'needle_file_name': needle_file_name,
|
||||
'reader_cfg': needlebench_reader_cfg,
|
||||
'infer_cfg': needlebench_infer_cfg,
|
||||
'eval_cfg': needlebench_eval_cfg
|
||||
}
|
||||
needlebench_datasets_zh.append(dataset_dict)
|
@ -16,6 +16,7 @@ configs/datasets/needlebench/
|
||||
├── needlebench_32k
|
||||
├── needlebench_128k
|
||||
├── needlebench_200k
|
||||
├── needlebench_1000k
|
||||
├── needlebench.py
|
||||
├── readme.md
|
||||
└── readme_zh-CN.md
|
||||
|
@ -16,6 +16,7 @@ configs/datasets/needlebench/
|
||||
├── needlebench_32k
|
||||
├── needlebench_128k
|
||||
├── needlebench_200k
|
||||
├── needlebench_1000k
|
||||
├── needlebench.py
|
||||
├── readme.md
|
||||
└── readme_zh-CN.md
|
||||
|
@ -539,8 +539,114 @@ needlebench_200k_summarizer = dict(
|
||||
],
|
||||
summary_groups=needlebench_summary_groups,
|
||||
)
|
||||
context_lengths_8k = list(range(5000, 9000, 1000))
|
||||
|
||||
# ----------NeedleBench-1000k-summarizer----------
|
||||
|
||||
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||
# Initialize the lists
|
||||
_needlebench_1000k_2needle_en = []
|
||||
_needlebench_1000k_3needle_en = []
|
||||
_needlebench_1000k_4needle_en = []
|
||||
_needlebench_1000k_5needle_en = []
|
||||
_needlebench_1000k_2needle_zh = []
|
||||
_needlebench_1000k_3needle_zh = []
|
||||
_needlebench_1000k_4needle_zh = []
|
||||
_needlebench_1000k_5needle_zh = []
|
||||
_needlebench_1000k_origin_en = []
|
||||
_needlebench_1000k_origin_zh = []
|
||||
|
||||
# Fill the lists using nested loops
|
||||
for original_context_length in context_lengths_1000k:
|
||||
for depth_percent in depths_list_sparse:
|
||||
_needlebench_1000k_2needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_en_1000k')
|
||||
_needlebench_1000k_3needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_en_1000k')
|
||||
_needlebench_1000k_4needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_en_1000k')
|
||||
_needlebench_1000k_5needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_5needle_en_1000k')
|
||||
_needlebench_1000k_2needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_zh_1000k')
|
||||
_needlebench_1000k_3needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_zh_1000k')
|
||||
_needlebench_1000k_4needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_zh_1000k')
|
||||
_needlebench_1000k_5needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_5needle_zh_1000k')
|
||||
|
||||
_needlebench_1000k_origin_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_origin_en_1000k')
|
||||
_needlebench_1000k_origin_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_origin_zh_1000k')
|
||||
|
||||
# Concatenate the multi-needle and origin lists
|
||||
_needlebench_1000k_multi_needle_en = _needlebench_1000k_2needle_en + _needlebench_1000k_3needle_en + _needlebench_1000k_4needle_en + _needlebench_1000k_5needle_en
|
||||
_needlebench_1000k_multi_needle_zh = _needlebench_1000k_2needle_zh + _needlebench_1000k_3needle_zh + _needlebench_1000k_4needle_zh + _needlebench_1000k_5needle_zh
|
||||
_needlebench_1000k_origin = _needlebench_1000k_origin_en + _needlebench_1000k_origin_zh
|
||||
_needlebench_1000k_multi_needle = _needlebench_1000k_multi_needle_en + _needlebench_1000k_multi_needle_zh
|
||||
|
||||
# Repeating the same process for parallel (assuming it's similar to origin_en)
|
||||
_needlebench_1000k_parallel_en = []
|
||||
_needlebench_1000k_parallel_zh = []
|
||||
for original_context_length in context_lengths_1000k:
|
||||
_needlebench_1000k_parallel_en.append(f'Length{original_context_length}_parallel_en_1000k')
|
||||
for original_context_length in context_lengths_1000k:
|
||||
_needlebench_1000k_parallel_zh.append(f'Length{original_context_length}_parallel_zh_1000k')
|
||||
_needlebench_1000k_parallel = _needlebench_1000k_parallel_en + _needlebench_1000k_parallel_zh
|
||||
|
||||
needlebench_summary_groups = [
|
||||
{'name': 'original_version', 'subsets': _needlebench_1000k_origin},
|
||||
{'name': 'original_version_zh', 'subsets': _needlebench_1000k_origin_zh},
|
||||
{'name': 'original_version_en', 'subsets': _needlebench_1000k_origin_en},
|
||||
|
||||
{'name': 'multi_needle_en', 'subsets': _needlebench_1000k_multi_needle_en},
|
||||
{'name': 'multi_needle2_en', 'subsets': _needlebench_1000k_2needle_en},
|
||||
{'name': 'multi_needle3_en', 'subsets': _needlebench_1000k_3needle_en},
|
||||
{'name': 'multi_needle4_en', 'subsets': _needlebench_1000k_4needle_en},
|
||||
{'name': 'multi_needle5_en', 'subsets': _needlebench_1000k_5needle_en},
|
||||
|
||||
{'name': 'multi_needle_zh', 'subsets': _needlebench_1000k_multi_needle_zh},
|
||||
{'name': 'multi_needle2_zh', 'subsets': _needlebench_1000k_2needle_zh},
|
||||
{'name': 'multi_needle3_zh', 'subsets': _needlebench_1000k_3needle_zh},
|
||||
{'name': 'multi_needle4_zh', 'subsets': _needlebench_1000k_4needle_zh},
|
||||
{'name': 'multi_needle5_zh', 'subsets': _needlebench_1000k_5needle_zh},
|
||||
|
||||
{'name': 'multi_needle', 'subsets': _needlebench_1000k_multi_needle},
|
||||
|
||||
{'name': 'parallel_version', 'subsets': _needlebench_1000k_parallel},
|
||||
{'name': 'parallel_version_zh', 'subsets': _needlebench_1000k_parallel_zh},
|
||||
{'name': 'parallel_version_en', 'subsets': _needlebench_1000k_parallel_en},
|
||||
|
||||
{'name': 'overall',
|
||||
'subsets': [['original_version', 'naive_average'],
|
||||
['multi_needle', 'naive_average'],
|
||||
['parallel_version', 'average_score']],
|
||||
'weights': {'original_version': 0.4,
|
||||
'multi_needle': 0.3,
|
||||
'parallel_version': 0.3}},
|
||||
]
|
||||
needlebench_1000k_summarizer = dict(
|
||||
type=NeedleBenchSummarizer,
|
||||
dataset_abbrs=[
|
||||
'overall',
|
||||
'--------- NeedleBench-1000k Single-Needle ---------', # category
|
||||
'original_version',
|
||||
'original_version_zh',
|
||||
'original_version_en',
|
||||
'--------- NeedleBench-1000k Parallel-Needles ---------', # category
|
||||
'parallel_version',
|
||||
'parallel_version_zh',
|
||||
'parallel_version_en',
|
||||
'--------- NeedleBench-1000k Multi-Needles ---------', # category
|
||||
'multi_needle',
|
||||
'multi_needle_en',
|
||||
'multi_needle_zh',
|
||||
'multi_needle2_en',
|
||||
'multi_needle3_en',
|
||||
'multi_needle4_en',
|
||||
'multi_needle5_en',
|
||||
'multi_needle2_zh',
|
||||
'multi_needle3_zh',
|
||||
'multi_needle4_zh',
|
||||
'multi_needle5_zh',
|
||||
|
||||
# *_needlebench_1000k_origin, *_needlebench_1000k_multi_needle, *_needlebench_1000k_parallel,
|
||||
],
|
||||
summary_groups=needlebench_summary_groups,
|
||||
)
|
||||
|
||||
context_lengths_8k = list(range(5000, 9000, 1000))
|
||||
# Repeating the same process for parallel (assuming it's similar to origin_en)
|
||||
_needlebench_8k_parallel_en_batch1 = []
|
||||
_needlebench_8k_parallel_en_batch5 = []
|
||||
|
Loading…
Reference in New Issue
Block a user