mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] update needlebench and configs (#986)
* add Needlebench-1000K configs * add prompt postion args * add model configs * Update parallel.py * fix lint
This commit is contained in:
parent
0665bb91a8
commit
0a6a03fe1a
@ -6,5 +6,6 @@ with read_base():
|
|||||||
from .needlebench_32k.needlebench import needlebench_datasets as needlebench_datasets_32k
|
from .needlebench_32k.needlebench import needlebench_datasets as needlebench_datasets_32k
|
||||||
from .needlebench_128k.needlebench import needlebench_datasets as needlebench_datasets_128k
|
from .needlebench_128k.needlebench import needlebench_datasets as needlebench_datasets_128k
|
||||||
from .needlebench_200k.needlebench import needlebench_datasets as needlebench_datasets_200k
|
from .needlebench_200k.needlebench import needlebench_datasets as needlebench_datasets_200k
|
||||||
|
from .needlebench_1000k.needlebench import needlebench_datasets as needlebench_datasets_1000k
|
||||||
|
|
||||||
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_2needle_en as needlebench_multi_2needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_3needle_en as needlebench_multi_3needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_4needle_en as needlebench_multi_4needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_5needle_en as needlebench_multi_5needle_en_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_2needle_zh as needlebench_multi_2needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_3needle_zh as needlebench_multi_3needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_4needle_zh as needlebench_multi_4needle_zh_datasets
|
||||||
|
from .needlebench_multi_reasoning import needlebench_datasets_5needle_zh as needlebench_multi_5needle_zh_datasets
|
||||||
|
|
||||||
|
from .needlebench_single import needlebench_datasets_en as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single import needlebench_datasets_zh as needlebench_origin_zh_datasets
|
||||||
|
from .needlebench_multi_retrieval import needlebench_datasets_en as needlebench_parallel_en_datasets
|
||||||
|
from .needlebench_multi_retrieval import needlebench_datasets_zh as needlebench_parallel_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -0,0 +1,286 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets.needlebench.multi import NeedleBenchMultiDataset
|
||||||
|
from opencompass.datasets.needlebench.multi import NeedleBenchMultiEvaluator
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def logistic(x, L=100, x0=50, k=0.1):
|
||||||
|
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_linear_space(start, end, num):
|
||||||
|
if num == 1:
|
||||||
|
return [start]
|
||||||
|
elif num < 1:
|
||||||
|
raise ValueError("num must be at least 1.")
|
||||||
|
step = (end - start) / (num - 1)
|
||||||
|
return [start + step * i for i in range(num)]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_depth_percents(intervals, interval_type):
|
||||||
|
if interval_type == 'linear':
|
||||||
|
return generate_linear_space(0, 100, intervals)
|
||||||
|
elif interval_type == 'sigmoid':
|
||||||
|
linear_space = generate_linear_space(0, 100, intervals)
|
||||||
|
return [logistic(x) for x in linear_space]
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported interval type')
|
||||||
|
|
||||||
|
|
||||||
|
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||||
|
|
||||||
|
needlebench_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{prompt}'),
|
||||||
|
dict(role='BOT', prompt='{answer}\n'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer))
|
||||||
|
|
||||||
|
needlebench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=NeedleBenchMultiEvaluator),
|
||||||
|
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||||
|
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||||
|
pred_role='BOT')
|
||||||
|
|
||||||
|
context_lengths = [20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000]
|
||||||
|
depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
|
# ----------English Version----------
|
||||||
|
base_path = './data/needlebench'
|
||||||
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
|
|
||||||
|
needle_file_name = 'multi_needle_reasoning_en.json'
|
||||||
|
diff = 10
|
||||||
|
num_needles = 2
|
||||||
|
needlebench_datasets_2needle_en = []
|
||||||
|
language = 'English'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_2needle_en.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 3
|
||||||
|
needlebench_datasets_3needle_en = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_3needle_en.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 4
|
||||||
|
needlebench_datasets_4needle_en = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_4needle_en.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 5
|
||||||
|
needlebench_datasets_5needle_en = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_en_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_5needle_en.append(dataset_dict)
|
||||||
|
|
||||||
|
# ----------Chinese Version----------
|
||||||
|
base_path = './data/needlebench'
|
||||||
|
file_list = ['zh_finance.jsonl']
|
||||||
|
|
||||||
|
needle_file_name = 'multi_needle_reasoning_zh.json'
|
||||||
|
diff = 10
|
||||||
|
num_needles = 2
|
||||||
|
needlebench_datasets_2needle_zh = []
|
||||||
|
language = 'Chinese'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_2needle_zh.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 3
|
||||||
|
needlebench_datasets_3needle_zh = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_3needle_zh.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 4
|
||||||
|
needlebench_datasets_4needle_zh = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_4needle_zh.append(dataset_dict)
|
||||||
|
|
||||||
|
num_needles = 5
|
||||||
|
needlebench_datasets_5needle_zh = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_{num_needles}needle_zh_1000k',
|
||||||
|
'type': NeedleBenchMultiDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': language,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'num_needles': num_needles,
|
||||||
|
'diff': diff,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_5needle_zh.append(dataset_dict)
|
@ -0,0 +1,108 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets.needlebench.parallel import NeedleBenchParallelDataset
|
||||||
|
from opencompass.datasets.needlebench.parallel import NeedleBenchParallelEvaluator
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def logistic(x, L=100, x0=50, k=0.1):
|
||||||
|
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_linear_space(start, end, num):
|
||||||
|
if num == 1:
|
||||||
|
return [start]
|
||||||
|
elif num < 1:
|
||||||
|
raise ValueError("num must be at least 1.")
|
||||||
|
step = (end - start) / (num - 1)
|
||||||
|
return [start + step * i for i in range(num)]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_depth_percents(intervals, interval_type):
|
||||||
|
if interval_type == 'linear':
|
||||||
|
return generate_linear_space(0, 100, intervals)
|
||||||
|
elif interval_type == 'sigmoid':
|
||||||
|
linear_space = generate_linear_space(0, 100, intervals)
|
||||||
|
return [logistic(x) for x in linear_space]
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported interval type')
|
||||||
|
|
||||||
|
|
||||||
|
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||||
|
|
||||||
|
needlebench_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{prompt}'),
|
||||||
|
dict(role='BOT', prompt='{answer}\n'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer))
|
||||||
|
|
||||||
|
needlebench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=NeedleBenchParallelEvaluator),
|
||||||
|
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||||
|
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||||
|
pred_role='BOT')
|
||||||
|
|
||||||
|
context_lengths = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||||
|
document_depth_percent_intervals = 20
|
||||||
|
document_depth_percent_interval_type = "linear"
|
||||||
|
|
||||||
|
base_path = './data/needlebench'
|
||||||
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
|
needlebench_datasets_en = []
|
||||||
|
needle_file_name = 'needles.jsonl'
|
||||||
|
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'_parallel_en_1000k',
|
||||||
|
'type': NeedleBenchParallelDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depths': depths,
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 25,
|
||||||
|
'length_buffer': 3000,
|
||||||
|
'guide': True,
|
||||||
|
'language': 'English',
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_en.append(dataset_dict)
|
||||||
|
|
||||||
|
file_list = ['zh_finance.jsonl']
|
||||||
|
needlebench_datasets_zh = []
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'_parallel_zh_1000k',
|
||||||
|
'type': NeedleBenchParallelDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depths': depths,
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 25,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': 'Chinese',
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_zh.append(dataset_dict)
|
@ -0,0 +1,109 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets.needlebench.origin import NeedleBenchOriginDataset
|
||||||
|
from opencompass.datasets.needlebench.origin import NeedleBenchOriginEvaluator
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def logistic(x, L=100, x0=50, k=0.1):
|
||||||
|
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_linear_space(start, end, num):
|
||||||
|
if num == 1:
|
||||||
|
return [start]
|
||||||
|
elif num < 1:
|
||||||
|
raise ValueError("num must be at least 1.")
|
||||||
|
step = (end - start) / (num - 1)
|
||||||
|
return [start + step * i for i in range(num)]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_depth_percents(intervals, interval_type):
|
||||||
|
if interval_type == 'linear':
|
||||||
|
return generate_linear_space(0, 100, intervals)
|
||||||
|
elif interval_type == 'sigmoid':
|
||||||
|
linear_space = generate_linear_space(0, 100, intervals)
|
||||||
|
return [logistic(x) for x in linear_space]
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported interval type')
|
||||||
|
|
||||||
|
|
||||||
|
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||||
|
|
||||||
|
needlebench_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{prompt}'),
|
||||||
|
dict(role='BOT', prompt='{answer}\n'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer))
|
||||||
|
|
||||||
|
needlebench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=NeedleBenchOriginEvaluator),
|
||||||
|
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||||
|
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||||
|
pred_role='BOT')
|
||||||
|
|
||||||
|
context_lengths = [20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000]
|
||||||
|
depths_list = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
|
||||||
|
|
||||||
|
base_path = './data/needlebench'
|
||||||
|
file_list = ['PaulGrahamEssays.jsonl']
|
||||||
|
needlebench_datasets_en = []
|
||||||
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_origin_en_1000k',
|
||||||
|
'type': NeedleBenchOriginDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': True,
|
||||||
|
'language': 'English',
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_en.append(dataset_dict)
|
||||||
|
|
||||||
|
file_list = ['zh_finance.jsonl']
|
||||||
|
needlebench_datasets_zh = []
|
||||||
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_origin_zh_1000k',
|
||||||
|
'type': NeedleBenchOriginDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': True,
|
||||||
|
'language': 'Chinese',
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg
|
||||||
|
}
|
||||||
|
needlebench_datasets_zh.append(dataset_dict)
|
@ -16,6 +16,7 @@ configs/datasets/needlebench/
|
|||||||
├── needlebench_32k
|
├── needlebench_32k
|
||||||
├── needlebench_128k
|
├── needlebench_128k
|
||||||
├── needlebench_200k
|
├── needlebench_200k
|
||||||
|
├── needlebench_1000k
|
||||||
├── needlebench.py
|
├── needlebench.py
|
||||||
├── readme.md
|
├── readme.md
|
||||||
└── readme_zh-CN.md
|
└── readme_zh-CN.md
|
||||||
|
@ -16,6 +16,7 @@ configs/datasets/needlebench/
|
|||||||
├── needlebench_32k
|
├── needlebench_32k
|
||||||
├── needlebench_128k
|
├── needlebench_128k
|
||||||
├── needlebench_200k
|
├── needlebench_200k
|
||||||
|
├── needlebench_1000k
|
||||||
├── needlebench.py
|
├── needlebench.py
|
||||||
├── readme.md
|
├── readme.md
|
||||||
└── readme_zh-CN.md
|
└── readme_zh-CN.md
|
||||||
|
33
configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
Normal file
33
configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from opencompass.models.turbomind import TurboMindModel
|
||||||
|
|
||||||
|
|
||||||
|
_meta_template = dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
|
||||||
|
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n',
|
||||||
|
generate=True),
|
||||||
|
],
|
||||||
|
eos_token_id=92542
|
||||||
|
)
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='internlm2-chat-20b-turbomind',
|
||||||
|
path="internlm/internlm2-chat-20b",
|
||||||
|
meta_template=_meta_template,
|
||||||
|
engine_config=dict(session_len=210000,
|
||||||
|
max_batch_size=8,
|
||||||
|
rope_scaling_factor=3.0,
|
||||||
|
model_name="internlm2-chat-20b",
|
||||||
|
tp=2),
|
||||||
|
gen_config=dict(top_k=1, top_p=0.8,
|
||||||
|
temperature=1.0,
|
||||||
|
max_new_tokens=2000,),
|
||||||
|
max_out_len=2000,
|
||||||
|
max_seq_len=210000,
|
||||||
|
batch_size=1,
|
||||||
|
concurrency=8,
|
||||||
|
run_cfg=dict(num_gpus=2, num_procs=1),
|
||||||
|
)
|
||||||
|
]
|
32
configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
Normal file
32
configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from opencompass.models.turbomind import TurboMindModel
|
||||||
|
|
||||||
|
|
||||||
|
_meta_template = dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
|
||||||
|
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n',
|
||||||
|
generate=True),
|
||||||
|
],
|
||||||
|
eos_token_id=92542
|
||||||
|
)
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='internlm2-chat-7b-turbomind',
|
||||||
|
path="internlm/internlm2-chat-7b",
|
||||||
|
meta_template=_meta_template,
|
||||||
|
engine_config=dict(session_len=210000,
|
||||||
|
max_batch_size=8,
|
||||||
|
rope_scaling_factor=2.0,
|
||||||
|
model_name="internlm2-chat-7b"),
|
||||||
|
gen_config=dict(top_k=1, top_p=0.8,
|
||||||
|
temperature=1.0,
|
||||||
|
max_new_tokens=2000),
|
||||||
|
max_out_len=2000,
|
||||||
|
max_seq_len=210000,
|
||||||
|
batch_size=8,
|
||||||
|
concurrency=8,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
]
|
25
configs/models/qwen/vllm_qwen_14b_chat.py
Normal file
25
configs/models/qwen/vllm_qwen_14b_chat.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from opencompass.models import VLLM
|
||||||
|
|
||||||
|
|
||||||
|
_meta_template = dict(
|
||||||
|
round=[
|
||||||
|
dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
|
||||||
|
dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=VLLM,
|
||||||
|
abbr='qwen-14b-chat-vllm',
|
||||||
|
path="Qwen/Qwen-14B-Chat",
|
||||||
|
model_kwargs=dict(tensor_parallel_size=4),
|
||||||
|
meta_template=_meta_template,
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=32,
|
||||||
|
generation_kwargs=dict(temperature=0),
|
||||||
|
end_str='<|im_end|>',
|
||||||
|
run_cfg=dict(num_gpus=4, num_procs=1),
|
||||||
|
)
|
||||||
|
]
|
@ -539,8 +539,114 @@ needlebench_200k_summarizer = dict(
|
|||||||
],
|
],
|
||||||
summary_groups=needlebench_summary_groups,
|
summary_groups=needlebench_summary_groups,
|
||||||
)
|
)
|
||||||
context_lengths_8k = list(range(5000, 9000, 1000))
|
|
||||||
|
|
||||||
|
# ----------NeedleBench-1000k-summarizer----------
|
||||||
|
|
||||||
|
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||||
|
# Initialize the lists
|
||||||
|
_needlebench_1000k_2needle_en = []
|
||||||
|
_needlebench_1000k_3needle_en = []
|
||||||
|
_needlebench_1000k_4needle_en = []
|
||||||
|
_needlebench_1000k_5needle_en = []
|
||||||
|
_needlebench_1000k_2needle_zh = []
|
||||||
|
_needlebench_1000k_3needle_zh = []
|
||||||
|
_needlebench_1000k_4needle_zh = []
|
||||||
|
_needlebench_1000k_5needle_zh = []
|
||||||
|
_needlebench_1000k_origin_en = []
|
||||||
|
_needlebench_1000k_origin_zh = []
|
||||||
|
|
||||||
|
# Fill the lists using nested loops
|
||||||
|
for original_context_length in context_lengths_1000k:
|
||||||
|
for depth_percent in depths_list_sparse:
|
||||||
|
_needlebench_1000k_2needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_en_1000k')
|
||||||
|
_needlebench_1000k_3needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_en_1000k')
|
||||||
|
_needlebench_1000k_4needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_en_1000k')
|
||||||
|
_needlebench_1000k_5needle_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_5needle_en_1000k')
|
||||||
|
_needlebench_1000k_2needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_2needle_zh_1000k')
|
||||||
|
_needlebench_1000k_3needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_3needle_zh_1000k')
|
||||||
|
_needlebench_1000k_4needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_4needle_zh_1000k')
|
||||||
|
_needlebench_1000k_5needle_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_5needle_zh_1000k')
|
||||||
|
|
||||||
|
_needlebench_1000k_origin_en.append(f'Length{original_context_length}Depth{int(depth_percent)}_origin_en_1000k')
|
||||||
|
_needlebench_1000k_origin_zh.append(f'Length{original_context_length}Depth{int(depth_percent)}_origin_zh_1000k')
|
||||||
|
|
||||||
|
# Concatenate the multi-needle and origin lists
|
||||||
|
_needlebench_1000k_multi_needle_en = _needlebench_1000k_2needle_en + _needlebench_1000k_3needle_en + _needlebench_1000k_4needle_en + _needlebench_1000k_5needle_en
|
||||||
|
_needlebench_1000k_multi_needle_zh = _needlebench_1000k_2needle_zh + _needlebench_1000k_3needle_zh + _needlebench_1000k_4needle_zh + _needlebench_1000k_5needle_zh
|
||||||
|
_needlebench_1000k_origin = _needlebench_1000k_origin_en + _needlebench_1000k_origin_zh
|
||||||
|
_needlebench_1000k_multi_needle = _needlebench_1000k_multi_needle_en + _needlebench_1000k_multi_needle_zh
|
||||||
|
|
||||||
|
# Repeating the same process for parallel (assuming it's similar to origin_en)
|
||||||
|
_needlebench_1000k_parallel_en = []
|
||||||
|
_needlebench_1000k_parallel_zh = []
|
||||||
|
for original_context_length in context_lengths_1000k:
|
||||||
|
_needlebench_1000k_parallel_en.append(f'Length{original_context_length}_parallel_en_1000k')
|
||||||
|
for original_context_length in context_lengths_1000k:
|
||||||
|
_needlebench_1000k_parallel_zh.append(f'Length{original_context_length}_parallel_zh_1000k')
|
||||||
|
_needlebench_1000k_parallel = _needlebench_1000k_parallel_en + _needlebench_1000k_parallel_zh
|
||||||
|
|
||||||
|
needlebench_summary_groups = [
|
||||||
|
{'name': 'original_version', 'subsets': _needlebench_1000k_origin},
|
||||||
|
{'name': 'original_version_zh', 'subsets': _needlebench_1000k_origin_zh},
|
||||||
|
{'name': 'original_version_en', 'subsets': _needlebench_1000k_origin_en},
|
||||||
|
|
||||||
|
{'name': 'multi_needle_en', 'subsets': _needlebench_1000k_multi_needle_en},
|
||||||
|
{'name': 'multi_needle2_en', 'subsets': _needlebench_1000k_2needle_en},
|
||||||
|
{'name': 'multi_needle3_en', 'subsets': _needlebench_1000k_3needle_en},
|
||||||
|
{'name': 'multi_needle4_en', 'subsets': _needlebench_1000k_4needle_en},
|
||||||
|
{'name': 'multi_needle5_en', 'subsets': _needlebench_1000k_5needle_en},
|
||||||
|
|
||||||
|
{'name': 'multi_needle_zh', 'subsets': _needlebench_1000k_multi_needle_zh},
|
||||||
|
{'name': 'multi_needle2_zh', 'subsets': _needlebench_1000k_2needle_zh},
|
||||||
|
{'name': 'multi_needle3_zh', 'subsets': _needlebench_1000k_3needle_zh},
|
||||||
|
{'name': 'multi_needle4_zh', 'subsets': _needlebench_1000k_4needle_zh},
|
||||||
|
{'name': 'multi_needle5_zh', 'subsets': _needlebench_1000k_5needle_zh},
|
||||||
|
|
||||||
|
{'name': 'multi_needle', 'subsets': _needlebench_1000k_multi_needle},
|
||||||
|
|
||||||
|
{'name': 'parallel_version', 'subsets': _needlebench_1000k_parallel},
|
||||||
|
{'name': 'parallel_version_zh', 'subsets': _needlebench_1000k_parallel_zh},
|
||||||
|
{'name': 'parallel_version_en', 'subsets': _needlebench_1000k_parallel_en},
|
||||||
|
|
||||||
|
{'name': 'overall',
|
||||||
|
'subsets': [['original_version', 'naive_average'],
|
||||||
|
['multi_needle', 'naive_average'],
|
||||||
|
['parallel_version', 'average_score']],
|
||||||
|
'weights': {'original_version': 0.4,
|
||||||
|
'multi_needle': 0.3,
|
||||||
|
'parallel_version': 0.3}},
|
||||||
|
]
|
||||||
|
needlebench_1000k_summarizer = dict(
|
||||||
|
type=NeedleBenchSummarizer,
|
||||||
|
dataset_abbrs=[
|
||||||
|
'overall',
|
||||||
|
'--------- NeedleBench-1000k Single-Needle ---------', # category
|
||||||
|
'original_version',
|
||||||
|
'original_version_zh',
|
||||||
|
'original_version_en',
|
||||||
|
'--------- NeedleBench-1000k Parallel-Needles ---------', # category
|
||||||
|
'parallel_version',
|
||||||
|
'parallel_version_zh',
|
||||||
|
'parallel_version_en',
|
||||||
|
'--------- NeedleBench-1000k Multi-Needles ---------', # category
|
||||||
|
'multi_needle',
|
||||||
|
'multi_needle_en',
|
||||||
|
'multi_needle_zh',
|
||||||
|
'multi_needle2_en',
|
||||||
|
'multi_needle3_en',
|
||||||
|
'multi_needle4_en',
|
||||||
|
'multi_needle5_en',
|
||||||
|
'multi_needle2_zh',
|
||||||
|
'multi_needle3_zh',
|
||||||
|
'multi_needle4_zh',
|
||||||
|
'multi_needle5_zh',
|
||||||
|
|
||||||
|
# *_needlebench_1000k_origin, *_needlebench_1000k_multi_needle, *_needlebench_1000k_parallel,
|
||||||
|
],
|
||||||
|
summary_groups=needlebench_summary_groups,
|
||||||
|
)
|
||||||
|
|
||||||
|
context_lengths_8k = list(range(5000, 9000, 1000))
|
||||||
# Repeating the same process for parallel (assuming it's similar to origin_en)
|
# Repeating the same process for parallel (assuming it's similar to origin_en)
|
||||||
_needlebench_8k_parallel_en_batch1 = []
|
_needlebench_8k_parallel_en_batch1 = []
|
||||||
_needlebench_8k_parallel_en_batch5 = []
|
_needlebench_8k_parallel_en_batch5 = []
|
||||||
|
@ -48,6 +48,7 @@ class NeedleBenchMultiDataset(BaseDataset):
|
|||||||
needle_file_name: str,
|
needle_file_name: str,
|
||||||
num_needles: int,
|
num_needles: int,
|
||||||
diff: int,
|
diff: int,
|
||||||
|
position: str = 'End',
|
||||||
):
|
):
|
||||||
data = {'prompt': [], 'answer': []}
|
data = {'prompt': [], 'answer': []}
|
||||||
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
||||||
@ -109,19 +110,42 @@ class NeedleBenchMultiDataset(BaseDataset):
|
|||||||
retrieval_question)
|
retrieval_question)
|
||||||
|
|
||||||
if language == 'Chinese':
|
if language == 'Chinese':
|
||||||
|
if position == 'End':
|
||||||
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
',或重复你的回答\n'
|
',或重复你的回答\n'
|
||||||
f'用户现在给你的文档是{context}\n\n'
|
f'用户现在给你的文档是{context}\n\n'
|
||||||
f'现在请问:{retrieval_question}')
|
f'现在请问:{retrieval_question}')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
|
',或重复你的回答\n'
|
||||||
|
f'现在请问:{retrieval_question}',
|
||||||
|
f'用户现在给你的文档是{context}\n\n')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
elif language == 'English':
|
elif language == 'English':
|
||||||
|
if position == 'End':
|
||||||
prompt = ('You are an intelligent AI assistant skilled in '
|
prompt = ('You are an intelligent AI assistant skilled in '
|
||||||
'answering user questions.\n'
|
'answering user questions.\n'
|
||||||
'Please keep your answers concise and clear. Do not'
|
'Please keep your answers concise and clear. Do '
|
||||||
' talk about irrelevant topics or repeat your '
|
'not talk about irrelevant topics or repeat '
|
||||||
'answers.\n'
|
'your answers.\nThe document '
|
||||||
f'The document given to you by the user is {context}'
|
f'given to you by the user is {context}\n\n'
|
||||||
f'\n\nNow, the question is: {retrieval_question}')
|
f'Now, the question is: {retrieval_question}')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = ('You are an intelligent AI assistant skilled in '
|
||||||
|
'answering user questions.\n'
|
||||||
|
'Please keep your answers concise and clear. Do '
|
||||||
|
'not talk about irrelevant topics or repeat '
|
||||||
|
'your answers.\n'
|
||||||
|
f'Now, the question is: {retrieval_question}'
|
||||||
|
'The document given to you by the user'
|
||||||
|
f' is {context}\n\n')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Language '{language}' is not supported.")
|
raise ValueError(f"Language '{language}' is not supported.")
|
||||||
|
|
||||||
|
@ -45,6 +45,7 @@ class NeedleBenchOriginDataset(BaseDataset):
|
|||||||
guide: bool,
|
guide: bool,
|
||||||
language: str,
|
language: str,
|
||||||
needle_file_name: str,
|
needle_file_name: str,
|
||||||
|
position: str = 'End',
|
||||||
):
|
):
|
||||||
data = {'prompt': [], 'answer': []}
|
data = {'prompt': [], 'answer': []}
|
||||||
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
||||||
@ -85,19 +86,42 @@ class NeedleBenchOriginDataset(BaseDataset):
|
|||||||
retrieval_question)
|
retrieval_question)
|
||||||
|
|
||||||
if language == 'Chinese':
|
if language == 'Chinese':
|
||||||
|
if position == 'End':
|
||||||
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
',或重复你的回答\n'
|
',或重复你的回答\n'
|
||||||
f'用户现在给你的文档是{context}\n\n'
|
f'用户现在给你的文档是{context}\n\n'
|
||||||
f'现在请问:{retrieval_question}')
|
f'现在请问:{retrieval_question}')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
|
',或重复你的回答\n'
|
||||||
|
f'现在请问:{retrieval_question}',
|
||||||
|
f'用户现在给你的文档是{context}\n\n')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
elif language == 'English':
|
elif language == 'English':
|
||||||
|
if position == 'End':
|
||||||
prompt = ('You are an intelligent AI assistant skilled in '
|
prompt = ('You are an intelligent AI assistant skilled in '
|
||||||
'answering user questions.\n'
|
'answering user questions.\n'
|
||||||
'Please keep your answers concise and clear. Do not'
|
'Please keep your answers concise and clear. Do '
|
||||||
' talk about irrelevant topics or repeat your '
|
'not talk about irrelevant topics or repeat '
|
||||||
'answers.\n'
|
'your answers.\nThe document '
|
||||||
f'The document given to you by the user is {context}'
|
f'given to you by the user is {context}\n\n'
|
||||||
f'\n\nNow, the question is: {retrieval_question}')
|
f'Now, the question is: {retrieval_question}')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = ('You are an intelligent AI assistant skilled in '
|
||||||
|
'answering user questions.\n'
|
||||||
|
'Please keep your answers concise and clear. Do '
|
||||||
|
'not talk about irrelevant topics or repeat '
|
||||||
|
'your answers.\n'
|
||||||
|
f'Now, the question is: {retrieval_question}'
|
||||||
|
'The document given to you by the user'
|
||||||
|
f' is {context}\n\n')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Language '{language}' is not supported.")
|
raise ValueError(f"Language '{language}' is not supported.")
|
||||||
|
|
||||||
|
@ -67,6 +67,7 @@ class NeedleBenchParallelDataset(BaseDataset):
|
|||||||
length_buffer: int,
|
length_buffer: int,
|
||||||
guide: bool,
|
guide: bool,
|
||||||
language: str,
|
language: str,
|
||||||
|
position: str = 'End',
|
||||||
):
|
):
|
||||||
data = {'prompt': [], 'answer': []}
|
data = {'prompt': [], 'answer': []}
|
||||||
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
tokenizer = tiktoken.encoding_for_model(tokenizer_model)
|
||||||
@ -134,12 +135,24 @@ class NeedleBenchParallelDataset(BaseDataset):
|
|||||||
retrieval_question)
|
retrieval_question)
|
||||||
|
|
||||||
if language == 'Chinese':
|
if language == 'Chinese':
|
||||||
|
if position == 'End':
|
||||||
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
',或重复你的回答\n请先仔细阅读下面的文档再依次回答'
|
',或重复你的回答\n请先仔细阅读下面的文档再依次回答'
|
||||||
f'最后提出的问题\n用户现在给你的文档是{context}\n\n'
|
f'最后提出的问题\n用户现在给你的文档是{context}\n\n'
|
||||||
f'现在请问:{retrieval_question}\n')
|
f'现在请问:{retrieval_question}\n')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
|
||||||
|
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
|
||||||
|
',或重复你的回答\n请先仔细阅读下面的文档再依次回答'
|
||||||
|
f'最后提出的问题\n现在请问:{retrieval_question}\n\n'
|
||||||
|
f'用户现在给你的文档是{context}\n')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
|
|
||||||
elif language == 'English':
|
elif language == 'English':
|
||||||
|
if position == 'End':
|
||||||
prompt = (
|
prompt = (
|
||||||
'You are an intelligent AI assistant skilled in '
|
'You are an intelligent AI assistant skilled in '
|
||||||
'answering user questions.\n'
|
'answering user questions.\n'
|
||||||
@ -148,6 +161,18 @@ class NeedleBenchParallelDataset(BaseDataset):
|
|||||||
'answers.\n'
|
'answers.\n'
|
||||||
f'The document given to you by the user is {context}'
|
f'The document given to you by the user is {context}'
|
||||||
f'\n\nNow, the questions are: {retrieval_question}\n')
|
f'\n\nNow, the questions are: {retrieval_question}\n')
|
||||||
|
elif position == 'Start':
|
||||||
|
prompt = (
|
||||||
|
'You are an intelligent AI assistant skilled in '
|
||||||
|
'answering user questions.\n'
|
||||||
|
'Please keep your answers concise and clear. Do not'
|
||||||
|
' talk about irrelevant topics or repeat your '
|
||||||
|
'answers.\n'
|
||||||
|
f'\nNow, the questions are: {retrieval_question}\n\n'
|
||||||
|
f'The document given to you by the user is {context}')
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported position. '
|
||||||
|
'Position must be "End" or "Start".')
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Language '{language}' is not supported.")
|
raise ValueError(f"Language '{language}' is not supported.")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user