2024-03-04 11:10:52 +08:00
from mmengine . config import read_base
2025-01-20 19:17:38 +08:00
with read_base ( ) :
2024-04-28 18:51:47 +08:00
# Evaluate needlebench_4k, adjust the configuration to use 8k, 32k, 128k, 200k, or 1000k if necessary.
2024-08-22 14:48:45 +08:00
# from opencompass.configs.datasets.needlebench.needlebench_4k.needlebench_4k import needlebench_datasets
# from opencompass.configs.summarizers.needlebench import needlebench_4k_summarizer as summarizer
2024-04-28 18:51:47 +08:00
# only eval original "needle in a haystack test" in needlebench_4k
2025-01-20 19:17:38 +08:00
from opencompass . configs . datasets . needlebench . needlebench_4k . needlebench_single_4k import (
needlebench_en_datasets , needlebench_zh_datasets )
from opencompass . configs . models . hf_internlm . hf_internlm2_chat_7b import \
models as internlm2_chat_7b
from opencompass . configs . models . hf_internlm . lmdeploy_internlm2_chat_7b import \
models as internlm2_chat_7b_200k
from opencompass . configs . summarizers . needlebench import \
needlebench_4k_summarizer as summarizer
2024-04-28 18:51:47 +08:00
2024-03-04 11:10:52 +08:00
# eval Ancestral Tracing Challenge(ATC)
2024-08-22 14:48:45 +08:00
# from opencompass.configs.datasets.needlebench.atc.atc_choice_50 import needlebench_datasets
# from opencompass.configs.summarizers.needlebench import atc_summarizer_50 as summarizer
2024-03-04 11:10:52 +08:00
datasets = sum ( [ v for k , v in locals ( ) . items ( ) if ( ' datasets ' in k ) ] , [ ] )
2024-04-28 18:51:47 +08:00
for m in internlm2_chat_7b :
2025-01-20 19:17:38 +08:00
m [ ' max_seq_len ' ] = 32768 # Ensure InternLM2-7B model can receive the full length of long texts, adjust for other models based on their supported maximum sequence length.
m [ ' max_out_len ' ] = 2000 # Ensure complete responses from the model in multi-needle retrieval tasks.
2024-03-04 11:10:52 +08:00
2024-04-28 18:51:47 +08:00
models = internlm2_chat_7b
2024-03-04 11:10:52 +08:00
work_dir = ' ./outputs/needlebench '