add prompt postion args

This commit is contained in:
DseidLi 2024-03-19 16:02:15 +08:00
parent 367ba1ba61
commit 75425acdf8
3 changed files with 107 additions and 37 deletions

View File

@ -48,6 +48,7 @@ class NeedleBenchMultiDataset(BaseDataset):
needle_file_name: str, needle_file_name: str,
num_needles: int, num_needles: int,
diff: int, diff: int,
position: str = 'End',
): ):
data = {'prompt': [], 'answer': []} data = {'prompt': [], 'answer': []}
tokenizer = tiktoken.encoding_for_model(tokenizer_model) tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -109,19 +110,42 @@ class NeedleBenchMultiDataset(BaseDataset):
retrieval_question) retrieval_question)
if language == 'Chinese': if language == 'Chinese':
prompt = ('你是一个善于回答用户问题的智能AI助手\n' if position == 'End':
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话' prompt = ('你是一个善于回答用户问题的智能AI助手\n'
',或重复你的回答\n' '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
f'用户现在给你的文档是{context}\n\n' ',或重复你的回答\n'
f'现在请问:{retrieval_question}') f'用户现在给你的文档是{context}\n\n'
f'现在请问:{retrieval_question}')
elif position == 'Start':
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
',或重复你的回答\n'
f'现在请问:{retrieval_question}',
f'用户现在给你的文档是{context}\n\n')
else:
raise ValueError('Unsupported position. '
'Position must be "End" or "Start".')
elif language == 'English': elif language == 'English':
prompt = ('You are an intelligent AI assistant skilled in ' if position == 'End':
'answering user questions.\n' prompt = ('You are an intelligent AI assistant skilled in '
'Please keep your answers concise and clear. Do not' 'answering user questions.\n'
' talk about irrelevant topics or repeat your ' 'Please keep your answers concise and clear. Do '
'answers.\n' 'not talk about irrelevant topics or repeat '
f'The document given to you by the user is {context}' 'your answers.\nThe document '
f'\n\nNow, the question is: {retrieval_question}') f'given to you by the user is {context}\n\n'
f'Now, the question is: {retrieval_question}')
elif position == 'Start':
prompt = ('You are an intelligent AI assistant skilled in '
'answering user questions.\n'
'Please keep your answers concise and clear. Do '
'not talk about irrelevant topics or repeat '
'your answers.\n'
f'Now, the question is: {retrieval_question}'
'The document given to you by the user'
f' is {context}\n\n')
else:
raise ValueError('Unsupported position. '
'Position must be "End" or "Start".')
else: else:
raise ValueError(f"Language '{language}' is not supported.") raise ValueError(f"Language '{language}' is not supported.")

View File

@ -45,6 +45,7 @@ class NeedleBenchOriginDataset(BaseDataset):
guide: bool, guide: bool,
language: str, language: str,
needle_file_name: str, needle_file_name: str,
position: str = 'End',
): ):
data = {'prompt': [], 'answer': []} data = {'prompt': [], 'answer': []}
tokenizer = tiktoken.encoding_for_model(tokenizer_model) tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -85,19 +86,42 @@ class NeedleBenchOriginDataset(BaseDataset):
retrieval_question) retrieval_question)
if language == 'Chinese': if language == 'Chinese':
prompt = ('你是一个善于回答用户问题的智能AI助手\n' if position == 'End':
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话' prompt = ('你是一个善于回答用户问题的智能AI助手\n'
',或重复你的回答\n' '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
f'用户现在给你的文档是{context}\n\n' ',或重复你的回答\n'
f'现在请问:{retrieval_question}') f'用户现在给你的文档是{context}\n\n'
f'现在请问:{retrieval_question}')
elif position == 'Start':
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
',或重复你的回答\n'
f'现在请问:{retrieval_question}',
f'用户现在给你的文档是{context}\n\n')
else:
raise ValueError('Unsupported position. '
'Position must be "End" or "Start".')
elif language == 'English': elif language == 'English':
prompt = ('You are an intelligent AI assistant skilled in ' if position == 'End':
'answering user questions.\n' prompt = ('You are an intelligent AI assistant skilled in '
'Please keep your answers concise and clear. Do not' 'answering user questions.\n'
' talk about irrelevant topics or repeat your ' 'Please keep your answers concise and clear. Do '
'answers.\n' 'not talk about irrelevant topics or repeat '
f'The document given to you by the user is {context}' 'your answers.\nThe document '
f'\n\nNow, the question is: {retrieval_question}') f'given to you by the user is {context}\n\n'
f'Now, the question is: {retrieval_question}')
elif position == 'Start':
prompt = ('You are an intelligent AI assistant skilled in '
'answering user questions.\n'
'Please keep your answers concise and clear. Do '
'not talk about irrelevant topics or repeat '
'your answers.\n'
f'Now, the question is: {retrieval_question}'
'The document given to you by the user'
f' is {context}\n\n')
else:
raise ValueError('Unsupported position. '
'Position must be "End" or "Start".')
else: else:
raise ValueError(f"Language '{language}' is not supported.") raise ValueError(f"Language '{language}' is not supported.")

View File

@ -67,6 +67,7 @@ class NeedleBenchParallelDataset(BaseDataset):
length_buffer: int, length_buffer: int,
guide: bool, guide: bool,
language: str, language: str,
position: str = 'End',
): ):
data = {'prompt': [], 'answer': []} data = {'prompt': [], 'answer': []}
tokenizer = tiktoken.encoding_for_model(tokenizer_model) tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -134,20 +135,41 @@ class NeedleBenchParallelDataset(BaseDataset):
retrieval_question) retrieval_question)
if language == 'Chinese': if language == 'Chinese':
prompt = ('你是一个善于回答用户问题的智能AI助手\n' if position == 'End':
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话' prompt = ('你是一个善于回答用户问题的智能AI助手\n'
',或重复你的回答\n请先仔细阅读下面的文档再依次回答' '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
f'最后提出的问题\n用户现在给你的文档是{context}\n\n' ',或重复你的回答\n请先仔细阅读下面的文档再依次回答'
f'现在请问:{retrieval_question}\n') f'最后提出的问题\n用户现在给你的文档是{context}\n\n'
f'现在请问:{retrieval_question}\n')
if position == 'Start':
prompt = ('你是一个善于回答用户问题的智能AI助手\n'
'请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
',或重复你的回答\n请先仔细阅读下面的文档再依次回答'
f'最后提出的问题\n现在请问:{retrieval_question}\n\n'
f'用户现在给你的文档是{context}\n')
else:
raise ValueError('Unsupported position. '
'Position must be "End" or "Start".')
elif language == 'English': elif language == 'English':
prompt = ( if position == 'End':
'You are an intelligent AI assistant skilled in ' prompt = (
'answering user questions.\n' 'You are an intelligent AI assistant skilled in '
'Please keep your answers concise and clear. Do not' 'answering user questions.\n'
' talk about irrelevant topics or repeat your ' 'Please keep your answers concise and clear. Do not'
'answers.\n' ' talk about irrelevant topics or repeat your '
f'The document given to you by the user is {context}' 'answers.\n'
f'\n\nNow, the questions are: {retrieval_question}\n') f'The document given to you by the user is {context}'
f'\n\nNow, the questions are: {retrieval_question}\n')
if position == 'Start':
prompt = (
'You are an intelligent AI assistant skilled in '
'answering user questions.\n'
'Please keep your answers concise and clear. Do not'
' talk about irrelevant topics or repeat your '
'answers.\n'
f'\nNow, the questions are: {retrieval_question}\n\n'
f'The document given to you by the user is {context}')
else: else:
raise ValueError(f"Language '{language}' is not supported.") raise ValueError(f"Language '{language}' is not supported.")