add prompt postion args

2025-05-30 16:03:24 +08:00 · 2024-03-19 16:02:15 +08:00 · 2024-03-19 16:02:15 +08:00 · 75425acdf8
commit 75425acdf8
parent 367ba1ba61
3 changed files with 107 additions and 37 deletions
--- a/opencompass/datasets/needlebench/multi.py
+++ b/opencompass/datasets/needlebench/multi.py
@ -48,6 +48,7 @@ class NeedleBenchMultiDataset(BaseDataset):
        needle_file_name: str,
        num_needles: int,
        diff: int,
+        position: str = 'End',
    ):
        data = {'prompt': [], 'answer': []}
        tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -109,19 +110,42 @@ class NeedleBenchMultiDataset(BaseDataset):
                    retrieval_question)

            if language == 'Chinese':
-                prompt = ('你是一个善于回答用户问题的智能AI助手\n'
-                          '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
-                          '，或重复你的回答\n'
-                          f'用户现在给你的文档是{context}\n\n'
-                          f'现在请问：{retrieval_question}')
+                if position == 'End':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n'
+                              f'用户现在给你的文档是{context}\n\n'
+                              f'现在请问：{retrieval_question}')
+                elif position == 'Start':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n'
+                              f'现在请问：{retrieval_question}',
+                              f'用户现在给你的文档是{context}\n\n')
+                else:
+                    raise ValueError('Unsupported position. '
+                                     'Position must be "End" or "Start".')
            elif language == 'English':
-                prompt = ('You are an intelligent AI assistant skilled in '
-                          'answering user questions.\n'
-                          'Please keep your answers concise and clear. Do not'
-                          ' talk about irrelevant topics or repeat your '
-                          'answers.\n'
-                          f'The document given to you by the user is {context}'
-                          f'\n\nNow, the question is: {retrieval_question}')
+                if position == 'End':
+                    prompt = ('You are an intelligent AI assistant skilled in '
+                              'answering user questions.\n'
+                              'Please keep your answers concise and clear. Do '
+                              'not talk about irrelevant topics or repeat '
+                              'your answers.\nThe document '
+                              f'given to you by the user is {context}\n\n'
+                              f'Now, the question is: {retrieval_question}')
+                elif position == 'Start':
+                    prompt = ('You are an intelligent AI assistant skilled in '
+                              'answering user questions.\n'
+                              'Please keep your answers concise and clear. Do '
+                              'not talk about irrelevant topics or repeat '
+                              'your answers.\n'
+                              f'Now, the question is: {retrieval_question}'
+                              'The document given to you by the user'
+                              f' is {context}\n\n')
+                else:
+                    raise ValueError('Unsupported position. '
+                                     'Position must be "End" or "Start".')
            else:
                raise ValueError(f"Language '{language}' is not supported.")

--- a/opencompass/datasets/needlebench/origin.py
+++ b/opencompass/datasets/needlebench/origin.py
@ -45,6 +45,7 @@ class NeedleBenchOriginDataset(BaseDataset):
        guide: bool,
        language: str,
        needle_file_name: str,
+        position: str = 'End',
    ):
        data = {'prompt': [], 'answer': []}
        tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -85,19 +86,42 @@ class NeedleBenchOriginDataset(BaseDataset):
                    retrieval_question)

            if language == 'Chinese':
-                prompt = ('你是一个善于回答用户问题的智能AI助手\n'
-                          '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
-                          '，或重复你的回答\n'
-                          f'用户现在给你的文档是{context}\n\n'
-                          f'现在请问：{retrieval_question}')
+                if position == 'End':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n'
+                              f'用户现在给你的文档是{context}\n\n'
+                              f'现在请问：{retrieval_question}')
+                elif position == 'Start':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n'
+                              f'现在请问：{retrieval_question}',
+                              f'用户现在给你的文档是{context}\n\n')
+                else:
+                    raise ValueError('Unsupported position. '
+                                     'Position must be "End" or "Start".')
            elif language == 'English':
-                prompt = ('You are an intelligent AI assistant skilled in '
-                          'answering user questions.\n'
-                          'Please keep your answers concise and clear. Do not'
-                          ' talk about irrelevant topics or repeat your '
-                          'answers.\n'
-                          f'The document given to you by the user is {context}'
-                          f'\n\nNow, the question is: {retrieval_question}')
+                if position == 'End':
+                    prompt = ('You are an intelligent AI assistant skilled in '
+                              'answering user questions.\n'
+                              'Please keep your answers concise and clear. Do '
+                              'not talk about irrelevant topics or repeat '
+                              'your answers.\nThe document '
+                              f'given to you by the user is {context}\n\n'
+                              f'Now, the question is: {retrieval_question}')
+                elif position == 'Start':
+                    prompt = ('You are an intelligent AI assistant skilled in '
+                              'answering user questions.\n'
+                              'Please keep your answers concise and clear. Do '
+                              'not talk about irrelevant topics or repeat '
+                              'your answers.\n'
+                              f'Now, the question is: {retrieval_question}'
+                              'The document given to you by the user'
+                              f' is {context}\n\n')
+                else:
+                    raise ValueError('Unsupported position. '
+                                     'Position must be "End" or "Start".')
            else:
                raise ValueError(f"Language '{language}' is not supported.")

--- a/opencompass/datasets/needlebench/parallel.py
+++ b/opencompass/datasets/needlebench/parallel.py
@ -67,6 +67,7 @@ class NeedleBenchParallelDataset(BaseDataset):
        length_buffer: int,
        guide: bool,
        language: str,
+        position: str = 'End',
    ):
        data = {'prompt': [], 'answer': []}
        tokenizer = tiktoken.encoding_for_model(tokenizer_model)
@ -134,20 +135,41 @@ class NeedleBenchParallelDataset(BaseDataset):
                    retrieval_question)

            if language == 'Chinese':
-                prompt = ('你是一个善于回答用户问题的智能AI助手\n'
-                          '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
-                          '，或重复你的回答\n请先仔细阅读下面的文档再依次回答'
-                          f'最后提出的问题\n用户现在给你的文档是{context}\n\n'
-                          f'现在请问：{retrieval_question}\n')
+                if position == 'End':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n请先仔细阅读下面的文档再依次回答'
+                              f'最后提出的问题\n用户现在给你的文档是{context}\n\n'
+                              f'现在请问：{retrieval_question}\n')
+                if position == 'Start':
+                    prompt = ('你是一个善于回答用户问题的智能AI助手\n'
+                              '请保持你的回答简洁清楚。不要说和下面文档中的无关的话'
+                              '，或重复你的回答\n请先仔细阅读下面的文档再依次回答'
+                              f'最后提出的问题\n现在请问：{retrieval_question}\n\n'
+                              f'用户现在给你的文档是{context}\n')
+                else:
+                    raise ValueError('Unsupported position. '
+                                     'Position must be "End" or "Start".')
+
            elif language == 'English':
-                prompt = (
-                    'You are an intelligent AI assistant skilled in '
-                    'answering user questions.\n'
-                    'Please keep your answers concise and clear. Do not'
-                    ' talk about irrelevant topics or repeat your '
-                    'answers.\n'
-                    f'The document given to you by the user is {context}'
-                    f'\n\nNow, the questions are: {retrieval_question}\n')
+                if position == 'End':
+                    prompt = (
+                        'You are an intelligent AI assistant skilled in '
+                        'answering user questions.\n'
+                        'Please keep your answers concise and clear. Do not'
+                        ' talk about irrelevant topics or repeat your '
+                        'answers.\n'
+                        f'The document given to you by the user is {context}'
+                        f'\n\nNow, the questions are: {retrieval_question}\n')
+                if position == 'Start':
+                    prompt = (
+                        'You are an intelligent AI assistant skilled in '
+                        'answering user questions.\n'
+                        'Please keep your answers concise and clear. Do not'
+                        ' talk about irrelevant topics or repeat your '
+                        'answers.\n'
+                        f'\nNow, the questions are: {retrieval_question}\n\n'
+                        f'The document given to you by the user is {context}')
            else:
                raise ValueError(f"Language '{language}' is not supported.")