From f8e41dfeb427c237a14eb956db0ac3335018e91f Mon Sep 17 00:00:00 2001 From: Mor-Li <2568818204@qq.com> Date: Sun, 27 Apr 2025 16:36:59 +0800 Subject: [PATCH] [Docs] fix needlebench examples --- .../advanced_guides/needleinahaystack_eval.md | 8 ++--- .../advanced_guides/needleinahaystack_eval.md | 8 ++--- examples/eval_needlebench.py | 32 +++++++++---------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/docs/en/advanced_guides/needleinahaystack_eval.md b/docs/en/advanced_guides/needleinahaystack_eval.md index 909f5cb3..7999498c 100644 --- a/docs/en/advanced_guides/needleinahaystack_eval.md +++ b/docs/en/advanced_guides/needleinahaystack_eval.md @@ -82,22 +82,22 @@ pip install vllm #### Evaluating Other `Huggingface` Models -For other models, it's recommended to create a custom config file to adjust `max_seq_len` and `max_out_len`, ensuring the model can process the full context. Here is an example (`configs/eval_needlebench.py`): +For other models, it's recommended to create a custom config file to adjust `max_seq_len` and `max_out_len`, ensuring the model can process the full context. Here is an example (`examples/eval_needlebench.py`): ```python from mmengine.config import read_base # we use mmengine.config to import other config files with read_base(): - from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b + from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b # Evaluate needlebench_32k, adjust the configuration to use 4k, 32k, 128k, 200k, or 1000k if necessary. # from .datasets.needlebench.needlebench_32k.needlebench_32k import needlebench_datasets # from .summarizers.needlebench import needlebench_32k_summarizer as summarizer # only eval original "needle in a haystack test" in needlebench_32k - from .datasets.needlebench.needlebench_32k.needlebench_single_32k import needlebench_zh_datasets, needlebench_en_datasets - from .summarizers.needlebench import needlebench_32k_summarizer as summarizer + from opencompass.configs.datasets.needlebench.needlebench_32k.needlebench_single_32k import needlebench_zh_datasets, needlebench_en_datasets + from opencompass.configs.summarizers.needlebench import needlebench_32k_summarizer as summarizer # eval Ancestral Tracing Challenge(ATC) # from .datasets.needlebench.atc.atc_0shot_nocot_2_power_en import needlebench_datasets diff --git a/docs/zh_cn/advanced_guides/needleinahaystack_eval.md b/docs/zh_cn/advanced_guides/needleinahaystack_eval.md index ff18b2d4..e807c336 100644 --- a/docs/zh_cn/advanced_guides/needleinahaystack_eval.md +++ b/docs/zh_cn/advanced_guides/needleinahaystack_eval.md @@ -85,22 +85,22 @@ pip install vllm #### 评估其他`Huggingface`模型 -对于其他模型,我们建议额外书写一个运行的配置文件以便对模型的`max_seq_len`, `max_out_len`参数进行修改,以便模型可以接收到完整的长文本内容。如这里的的`configs/eval_needlebench.py`文件。完整内容如下 +对于其他模型,我们建议额外书写一个运行的配置文件以便对模型的`max_seq_len`, `max_out_len`参数进行修改,以便模型可以接收到完整的长文本内容。如这里的的`examples/eval_needlebench.py`文件。完整内容如下 ```python from mmengine.config import read_base # we use mmengine.config to import other config files with read_base(): - from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b + from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b # Evaluate needlebench_32k, adjust the configuration to use 4k, 32k, 128k, 200k, or 1000k if necessary. # from .datasets.needlebench.needlebench_32k.needlebench_32k import needlebench_datasets # from .summarizers.needlebench import needlebench_32k_summarizer as summarizer # only eval original "needle in a haystack test" in needlebench_32k - from .datasets.needlebench.needlebench_32k.needlebench_single_32k import needlebench_zh_datasets, needlebench_en_datasets - from .summarizers.needlebench import needlebench_32k_summarizer as summarizer + from opencompass.configs.datasets.needlebench.needlebench_32k.needlebench_single_32k import needlebench_zh_datasets, needlebench_en_datasets + from opencompass.configs.summarizers.needlebench import needlebench_32k_summarizer as summarizer # eval Ancestral Tracing Challenge(ATC) # from .datasets.needlebench.atc.atc_0shot_nocot_2_power_en import needlebench_datasets diff --git a/examples/eval_needlebench.py b/examples/eval_needlebench.py index e471233e..e2a4d75d 100644 --- a/examples/eval_needlebench.py +++ b/examples/eval_needlebench.py @@ -1,29 +1,27 @@ from mmengine.config import read_base +# we use mmengine.config to import other config files with read_base(): - # Evaluate needlebench_4k, adjust the configuration to use 8k, 32k, 128k, 200k, or 1000k if necessary. - # from opencompass.configs.datasets.needlebench.needlebench_4k.needlebench_4k import needlebench_datasets - # from opencompass.configs.summarizers.needlebench import needlebench_4k_summarizer as summarizer - # only eval original "needle in a haystack test" in needlebench_4k - from opencompass.configs.datasets.needlebench.needlebench_4k.needlebench_single_4k import ( - needlebench_en_datasets, needlebench_zh_datasets) - from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \ - models as internlm2_chat_7b - from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_7b import \ - models as internlm2_chat_7b_200k - from opencompass.configs.summarizers.needlebench import \ - needlebench_4k_summarizer as summarizer + from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b + + # Evaluate needlebench_32k, adjust the configuration to use 4k, 32k, 128k, 200k, or 1000k if necessary. + # from .datasets.needlebench.needlebench_32k.needlebench_32k import needlebench_datasets + # from .summarizers.needlebench import needlebench_32k_summarizer as summarizer + + # only eval original "needle in a haystack test" in needlebench_32k + from opencompass.configs.datasets.needlebench.needlebench_32k.needlebench_single_32k import needlebench_zh_datasets, needlebench_en_datasets + from opencompass.configs.summarizers.needlebench import needlebench_32k_summarizer as summarizer # eval Ancestral Tracing Challenge(ATC) - # from opencompass.configs.datasets.needlebench.atc.atc_choice_50 import needlebench_datasets - # from opencompass.configs.summarizers.needlebench import atc_summarizer_50 as summarizer + # from .datasets.needlebench.atc.atc_0shot_nocot_2_power_en import needlebench_datasets + # ATC use default summarizer thus no need to import summarizer datasets = sum([v for k, v in locals().items() if ('datasets' in k)], []) for m in internlm2_chat_7b: - m['max_seq_len'] = 32768 # Ensure InternLM2-7B model can receive the full length of long texts, adjust for other models based on their supported maximum sequence length. - m['max_out_len'] = 2000 # Ensure complete responses from the model in multi-needle retrieval tasks. + m['max_seq_len'] = 32768 # 保证InternLM2-7B模型能接收到完整的长文本,其他模型需要根据各自支持的最大序列长度修改。 + m['max_out_len'] = 4096 models = internlm2_chat_7b -work_dir = './outputs/needlebench' +work_dir = './outputs/needlebench' \ No newline at end of file