diff --git a/opencompass/datasets/needlebench/multi.py b/opencompass/datasets/needlebench/multi.py index 3a2d1157..6f1c4899 100644 --- a/opencompass/datasets/needlebench/multi.py +++ b/opencompass/datasets/needlebench/multi.py @@ -48,6 +48,7 @@ class NeedleBenchMultiDataset(BaseDataset): needle_file_name: str, num_needles: int, diff: int, + position: str = 'End', ): data = {'prompt': [], 'answer': []} tokenizer = tiktoken.encoding_for_model(tokenizer_model) @@ -109,19 +110,42 @@ class NeedleBenchMultiDataset(BaseDataset): retrieval_question) if language == 'Chinese': - prompt = ('你是一个善于回答用户问题的智能AI助手\n' - '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' - ',或重复你的回答\n' - f'用户现在给你的文档是{context}\n\n' - f'现在请问:{retrieval_question}') + if position == 'End': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n' + f'用户现在给你的文档是{context}\n\n' + f'现在请问:{retrieval_question}') + elif position == 'Start': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n' + f'现在请问:{retrieval_question}', + f'用户现在给你的文档是{context}\n\n') + else: + raise ValueError('Unsupported position. ' + 'Position must be "End" or "Start".') elif language == 'English': - prompt = ('You are an intelligent AI assistant skilled in ' - 'answering user questions.\n' - 'Please keep your answers concise and clear. Do not' - ' talk about irrelevant topics or repeat your ' - 'answers.\n' - f'The document given to you by the user is {context}' - f'\n\nNow, the question is: {retrieval_question}') + if position == 'End': + prompt = ('You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do ' + 'not talk about irrelevant topics or repeat ' + 'your answers.\nThe document ' + f'given to you by the user is {context}\n\n' + f'Now, the question is: {retrieval_question}') + elif position == 'Start': + prompt = ('You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do ' + 'not talk about irrelevant topics or repeat ' + 'your answers.\n' + f'Now, the question is: {retrieval_question}' + 'The document given to you by the user' + f' is {context}\n\n') + else: + raise ValueError('Unsupported position. ' + 'Position must be "End" or "Start".') else: raise ValueError(f"Language '{language}' is not supported.") diff --git a/opencompass/datasets/needlebench/origin.py b/opencompass/datasets/needlebench/origin.py index 561de1ba..2848dc3a 100644 --- a/opencompass/datasets/needlebench/origin.py +++ b/opencompass/datasets/needlebench/origin.py @@ -45,6 +45,7 @@ class NeedleBenchOriginDataset(BaseDataset): guide: bool, language: str, needle_file_name: str, + position: str = 'End', ): data = {'prompt': [], 'answer': []} tokenizer = tiktoken.encoding_for_model(tokenizer_model) @@ -85,19 +86,42 @@ class NeedleBenchOriginDataset(BaseDataset): retrieval_question) if language == 'Chinese': - prompt = ('你是一个善于回答用户问题的智能AI助手\n' - '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' - ',或重复你的回答\n' - f'用户现在给你的文档是{context}\n\n' - f'现在请问:{retrieval_question}') + if position == 'End': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n' + f'用户现在给你的文档是{context}\n\n' + f'现在请问:{retrieval_question}') + elif position == 'Start': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n' + f'现在请问:{retrieval_question}', + f'用户现在给你的文档是{context}\n\n') + else: + raise ValueError('Unsupported position. ' + 'Position must be "End" or "Start".') elif language == 'English': - prompt = ('You are an intelligent AI assistant skilled in ' - 'answering user questions.\n' - 'Please keep your answers concise and clear. Do not' - ' talk about irrelevant topics or repeat your ' - 'answers.\n' - f'The document given to you by the user is {context}' - f'\n\nNow, the question is: {retrieval_question}') + if position == 'End': + prompt = ('You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do ' + 'not talk about irrelevant topics or repeat ' + 'your answers.\nThe document ' + f'given to you by the user is {context}\n\n' + f'Now, the question is: {retrieval_question}') + elif position == 'Start': + prompt = ('You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do ' + 'not talk about irrelevant topics or repeat ' + 'your answers.\n' + f'Now, the question is: {retrieval_question}' + 'The document given to you by the user' + f' is {context}\n\n') + else: + raise ValueError('Unsupported position. ' + 'Position must be "End" or "Start".') else: raise ValueError(f"Language '{language}' is not supported.") diff --git a/opencompass/datasets/needlebench/parallel.py b/opencompass/datasets/needlebench/parallel.py index 6133a3ca..2d522737 100644 --- a/opencompass/datasets/needlebench/parallel.py +++ b/opencompass/datasets/needlebench/parallel.py @@ -67,6 +67,7 @@ class NeedleBenchParallelDataset(BaseDataset): length_buffer: int, guide: bool, language: str, + position: str = 'End', ): data = {'prompt': [], 'answer': []} tokenizer = tiktoken.encoding_for_model(tokenizer_model) @@ -134,20 +135,41 @@ class NeedleBenchParallelDataset(BaseDataset): retrieval_question) if language == 'Chinese': - prompt = ('你是一个善于回答用户问题的智能AI助手\n' - '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' - ',或重复你的回答\n请先仔细阅读下面的文档再依次回答' - f'最后提出的问题\n用户现在给你的文档是{context}\n\n' - f'现在请问:{retrieval_question}\n') + if position == 'End': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n请先仔细阅读下面的文档再依次回答' + f'最后提出的问题\n用户现在给你的文档是{context}\n\n' + f'现在请问:{retrieval_question}\n') + if position == 'Start': + prompt = ('你是一个善于回答用户问题的智能AI助手\n' + '请保持你的回答简洁清楚。不要说和下面文档中的无关的话' + ',或重复你的回答\n请先仔细阅读下面的文档再依次回答' + f'最后提出的问题\n现在请问:{retrieval_question}\n\n' + f'用户现在给你的文档是{context}\n') + else: + raise ValueError('Unsupported position. ' + 'Position must be "End" or "Start".') + elif language == 'English': - prompt = ( - 'You are an intelligent AI assistant skilled in ' - 'answering user questions.\n' - 'Please keep your answers concise and clear. Do not' - ' talk about irrelevant topics or repeat your ' - 'answers.\n' - f'The document given to you by the user is {context}' - f'\n\nNow, the questions are: {retrieval_question}\n') + if position == 'End': + prompt = ( + 'You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do not' + ' talk about irrelevant topics or repeat your ' + 'answers.\n' + f'The document given to you by the user is {context}' + f'\n\nNow, the questions are: {retrieval_question}\n') + if position == 'Start': + prompt = ( + 'You are an intelligent AI assistant skilled in ' + 'answering user questions.\n' + 'Please keep your answers concise and clear. Do not' + ' talk about irrelevant topics or repeat your ' + 'answers.\n' + f'\nNow, the questions are: {retrieval_question}\n\n' + f'The document given to you by the user is {context}') else: raise ValueError(f"Language '{language}' is not supported.")